From 52d802eab37caf7e34002ab7c023cae5d2e19814 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 11:06:01 +1100 Subject: [PATCH 01/43] common plotting util that reports can leverage --- modules/local/generate_report/plot_utils.py | 324 ++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 modules/local/generate_report/plot_utils.py diff --git a/modules/local/generate_report/plot_utils.py b/modules/local/generate_report/plot_utils.py new file mode 100644 index 000000000..6b56726bc --- /dev/null +++ b/modules/local/generate_report/plot_utils.py @@ -0,0 +1,324 @@ +from collections import OrderedDict +import plotly.graph_objects as go +from Bio import PDB +import matplotlib.pyplot as plt +import numpy as np +import os + +def reset_residue_numbers(structure): + """ + Resets residue numbering in a PDB file, because ESMFold starts + and increment only when encountering a new residue. + """ + if str(structure).endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif str(structure).endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + print(f"{structure} is neither a PDB or mmCIF file!") + return + + structure = parser.get_structure("structure", structure) + + for model in structure: + for idx, residue in enumerate(model.get_residues(), start=1): + # Do a swap in place to renumber the residue, the other entries in the tuple can stay the same + # See: https://biopython.org/docs/1.76/api/Bio.PDB.Chain.html#Bio.PDB.Chain.Chain.__getitem__ + het_atom, _, insertion_code = residue.get_id() + residue.id = (het_atom, idx, insertion_code) + + io = PDB.PDBIO() + io.set_structure(structure) + + return structure + +# TODO: Barcelona team to implement AF3 +def sort_structures_by_rank(structures, prog): + """ + Sorts a list of structures based on their rank. Needs to handle different program naming + """ + if prog == "alphafold2": + # AlphaFold2 structures are named with [run]/ranked_[rank].pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).replace('ranked_', '').split('.')[0])) + if prog == "colabfold": + # ColabFold structures are named with [run]_unrelaxed_rank_[rank]_alphafold2_ptm_model_[num]_seed_[seed].pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_')[3])) + if prog == "helixfold3": + # HelixFold3 structures are named with .../[run]/[run]-rank[rank]/predicted_structure.pdb + sorted_structures = sorted(structures, key=lambda x: int(os.path.dirname(x).split('rank')[-1])) + if prog == "esmfold" or "rosettafold-all-atom": + # ESMFold and RoseTTAFold only produce one structure + sorted_structures = structures[0] + if prog == "boltz1": + # Boltz1 structures are named with ..._model_[diffusion_samples-1].[pdb|cif] + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_model_')[-1])) + else: + print(f"Warning: Sorting not implemented for {prog}. Using original order.") + return structures + + return sorted_structures + +def align_structures(structures): + + if not structures: + raise ValueError("No structures provided for alignment.") + + if structures[0].endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif structures[0].endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + + parsed_structures = [parser.get_structure(f"structure-{idx}", structure) for idx, structure in enumerate(structures)] + ref_structure = parsed_structures[0] + + def get_atom_ids(structure): + # Note: this is a *set* of atom_ids due to the {} surrounding the comprehension + return {(atom.get_parent().get_id(), atom.name) for atom in structure.get_atoms()} + + # TODO: do we want to raise and error if the structures are not identical atomically, or keep the ability to sub-align? + # Update the atoms shared between structures with progressive intersections + common_atoms = get_atom_ids(ref_structure) + for structure in parsed_structures[1:]: + common_atoms.intersection_update(get_atom_ids(structure)) + + if not common_atoms: + raise ValueError("No common atoms found between structures.") + + def extract_atoms(structure, atom_ids): + # Note: this comprehension returns an atom *object* for each atom in the structure + return {atom for atom in structure.get_atoms() if (atom.get_parent().get_id(), atom.name) in atom_ids} + + ref_atoms = extract_atoms(ref_structure, common_atoms) + + # The aligned structures will be the parsed structures aligned to the common atoms of the reference structure + super_imposer = PDB.Superimposer() + aligned_structures = [] + for idx, structure in enumerate(parsed_structures): + # The reference structure doesn't need to be aligned so can be skipped + if idx == 0: + aligned_structures.append(structure) + continue + + target_atoms = extract_atoms(structure, common_atoms) + super_imposer.set_atoms(ref_atoms, target_atoms) + super_imposer.apply(structure.get_atoms()) + + io = PDB.PDBIO() + io.set_structure(structure) + aligned_structures.append(structure) + + # Technically, parsed_structures now also points to the same aligned structures, but I've kept for readability + return aligned_structures + +def plddt_from_struct_b_factor(structure): + """ + Uses the BioPython PDB package to extract residue pLDDT values from the b-factor column. Iterates over PDB objects rather than processes raw file + """ + if str(structure).endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + structure = parser.get_structure(id=id, file=structure) + elif str(structure).endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + structure = parser.get_structure(structure_id=id, filename=structure) + else: + print(f"{structure} is neither a PDB or mmCIF file!") + + res_list = [] + res_plddts = [] + plddt_tot = 0 + + for model in structure: + for chain in model: + chain_res_list = chain.get_unpacked_list() + res_list.extend(chain_res_list) + for residue in chain: + atom_list = residue.get_unpacked_list() + atom_plddt_tot = 0 + for atom in residue: # ESMFold and others have separate atom-wise values, so doing atom-wise to cover that and residue-wise + atom_plddt = atom.get_bfactor() + atom_plddt_tot += atom_plddt + + res_plddt = float(atom_plddt_tot / len(atom_list)) + + if (res_plddt < 1): # RFAA the multiplication of mean isn't failing. Anyway covering to a [0,100] range for any structure file1 + res_plddt *= 100 + + res_plddts.append(res_plddt) + plddt_tot += res_plddt + + res_plddts = np.array(res_plddts) + res_plddts = np.round(res_plddts, 2) + + return res_plddts + +def generate_plddt_plot(structures): + """ + Generate a Plotly figure for predicted LDDT per position for given structures. + + Args: + structures (list): List of structure file paths. + + Returns: + go.Figure: Plotly figure object with pLDDT data. + """ + plddt_per_struct = OrderedDict() + + for struct in structures: + plddt_per_struct[struct] = plddt_from_struct_b_factor(struct) + + fig = go.Figure() + + for idx, (struct, plddts) in enumerate(plddt_per_struct.items()): + fig.add_trace( + go.Scatter( + x=list(range(len(plddts))), + y=plddts, + mode="lines", + name=f"rank-{idx}", + text=[f"({idx}, {value:.2f})" for idx, value in enumerate(plddts)], + hoverinfo="text", + ) + ) + fig.update_layout( + title=dict(text="Predicted LDDT per position", x=0.5, xanchor="center"), + xaxis=dict( + title="Positions", showline=True, linecolor="black", gridcolor="WhiteSmoke" + ), + yaxis=dict( + title="Predicted LDDT", + range=[0, 100], + showline=True, + linecolor="black", + gridcolor="WhiteSmoke", + ), + legend=dict( + yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1 + ), + plot_bgcolor="white", + width=600, + height=600, + ) + + return fig + +def process_msas(msa_path): + msa = np.loadtxt(msa_path, dtype=int) + + query_sequence = msa[0] + seqid_match = np.mean(msa == query_sequence, axis=1) + + # Sort sequences by sequence identity + seqid_sort_indices = np.argsort(seqid_match) + sorted_msa = msa[seqid_sort_indices] + sorted_seqid = seqid_match[seqid_sort_indices] + + non_gaps_msas = np.where(sorted_msa != 21, 1.0, np.nan) + + # Scale non-gap positions by sequence identity + final_msas = non_gaps_msas * sorted_seqid[:, None] + + return final_msas, non_gaps_msas + +def generate_sequence_coverage_plot(msa_path, out_dir, name, save_image=True): + final_msas, non_gaps_msas = process_msas(msa_path) + # + seq_depth_counts = np.sum(~np.isnan(non_gaps_msas), axis=0) + + # TODO: don't have a seperate save image plot and an HTML plotly ploy + # ################################################################## + # Plot the sequence coverage with matplotlib and save as image + # ################################################################## + if save_image: + image_path = f"{out_dir}/{name+('_' if name else '')}seq_coverage.png" + plt.figure(figsize=(14, 14), dpi=100) + plt.title("Sequence coverage", fontsize=30, pad=36) + plt.imshow( + final_msas, + interpolation="nearest", + aspect="auto", + cmap="rainbow_r", + vmin=0, + vmax=1, + origin="lower", + ) + + + plt.plot(seq_depth_counts, color="black") + plt.xlim(-0.5, len(final_msas[0]) - 0.5) + plt.ylim(-0.5, len(final_msas) - 0.5) + + plt.tick_params(axis="both", which="both", labelsize=18) + + cbar = plt.colorbar() + cbar.set_label("Sequence identity to query", fontsize=24, labelpad=24) + cbar.ax.tick_params(labelsize=18) + plt.xlabel("Positions", fontsize=24, labelpad=24) + plt.ylabel("Sequences", fontsize=24, labelpad=36) + plt.savefig(image_path) + + # ################################################################## + # Interactive HTML plot of sequence coverage + fig = go.Figure() + fig.add_trace( + go.Heatmap( + z=final_msas, + colorscale="Rainbow_r", + zmin=0, + zmax=1, + colorbar={"title": 'Your title'} + ) + ) + # Add black line for sequence coverage depth + fig.add_trace( + go.Scatter( + x=list(range(len(seq_depth_counts))), + y=seq_depth_counts, + mode="lines", + line=dict(color="black", width=2), + name="Coverage Depth", + ) + ) + fig.update_layout( + title=dict(text="Sequence coverage", x=0.5, xanchor="center"), + xaxis_title="Positions", yaxis_title="Sequences", + ) + + if save_image: + return fig, image_path + else: + return fig + +def generate_pae_plot(pae_path, out_dir, name, save_image=True): + """ + Generate a Plotly heatmap for Predicted Aligned Error (PAE) data. + + Args: + pae (2D array): The PAE matrix. + Returns: + fig: A Plotly figure object of the PAE heatmap in green color scale + """ + pae = np.genfromtxt(pae_path, delimiter="\t") + max_pae = np.max(pae) + fig = go.Figure() + + # Add heatmap + fig.add_trace( + go.Heatmap( + z=pae, + colorscale="Greens_r", + zmin=0, + zmax=max_pae, + ) + ) + fig.update_layout( + xaxis=dict(title="Scored Residue"), + yaxis=dict(title="Aligned Residue"), + ) + + if save_image: + image_path = f"{out_dir}/{name+('_' if name else '')}pae.png" + fig.write_image(image_path, width=800, height=800) + + return fig From 8cf6ebdd417c35827ffebe225a1d2ced2064e51d Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 11:08:34 +1100 Subject: [PATCH 02/43] A common generate_report that leverages plot_utils --- .../local/generate_report/generate_report.py | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 modules/local/generate_report/generate_report.py diff --git a/modules/local/generate_report/generate_report.py b/modules/local/generate_report/generate_report.py new file mode 100644 index 000000000..d314500da --- /dev/null +++ b/modules/local/generate_report/generate_report.py @@ -0,0 +1,179 @@ +from plot_utils import ( + reset_residue_numbers, + sort_structures_by_rank, + align_structures, + plddt_from_struct_b_factor, + generate_plddt_plot, + generate_pae_plot, + generate_sequence_coverage_plot, +) +import base64 +import argparse + +# TODO: Barcelona team to implement AF3, others +prog_name_mapping = { + "proteinfold": "ProteinFold", + "alphafold2": "AlphaFold2", + "esmfold": "ESMFold", + "colabfold": "ColabFold", + "rosettafold-all-atom": "RoseTTAFold-All-Atom", + "helixfold3": "HelixFold3", + "boltz1": "Boltz1", +} + +def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=None, pae_files=None, prog="ProteinFold", type="standard", html_template=None, write_htmls=True, seq_cov_as_html=False): + + # Change this to not just be ESMFold. HF3 resets on chainbreaks. Have structure res sequential just in case + for structure in structures: + structure = reset_residue_numbers(structure) + + # Sort structures by name and limit to set set number + if len(structures) > num_structs_limit: + print(f"Warning: More than {num_structs_limit} structures provided. Sorting and using only the first {num_structs_limit} structures.") + sorted_structures = sort_structures_by_rank(structures, prog) + structures = sorted_structures[:num_structs_limit] + + # Replace structures with aligned versions + if type == "comparison": + aligned_structures = align_structures(structures, save_ref_structure=True) + structures = aligned_structures + + # Keeping for parsing visibility purposes + print("Structures:", structures) + + #TODO: should really use a proper HTML parser for this, like BeautifulSoup or html5lib. strings prone to failure + #However, most replacements are simple and this is faster + template = open(html_template, "r").read() + template = template.replace("*sample_name*", name) + template = template.replace("*prog_name*", prog_name_mapping[prog]) + + lddt_averages = [] + for structure in structures: + lddt_averages.append(round(plddt_from_struct_b_factor(structure).mean(), 2)) + averages_js_array = f"const LDDT_AVERAGES = {lddt_averages};" + template = template.replace("const LDDT_AVERAGES = [];", averages_js_array) + + # Populate MODELS into the HTML templat + rank_names = [f"Rank {idx+1}" for idx, _ in enumerate(structures)] + model_names_js = ("const MODELS = [" + ",\n".join([f'"{model}"' for model in rank_names]) + "];") + template = template.replace("const MODELS = [];", model_names_js) + + # Populate MODELS_DATA with the content of the PDB files + # TODO: If the .cif string is written as a literal in the report, will it still render? Probably, not be see the logic + pdb_strings = [open(structure, "r").read().replace("\n", "\\n") for structure in structures] + models_data = ",\n".join([f'"{pdb_string}"' for pdb_string in pdb_strings]) + models_data_js = f"const MODELS_DATA = [{models_data}];" + template = template.replace("const MODELS_DATA = [];", models_data_js) + + # Generate sequence coverage plots and convert to HTML + if msa_files: + for msa_file in msa_files: + seq_cov_fig, seq_cov_img_path = generate_sequence_coverage_plot(msa_file, out_dir, name, save_image=True) + seq_cov_img_encoded = base64.b64encode(open(seq_cov_img_path, "rb").read()).decode("utf-8") + seq_cov_img_tag = f'Sequence Coverage Image' + + seq_cov_html = seq_cov_fig.to_html( + full_html=False, + include_plotlyjs="cdn", + config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + ) + if seq_cov_as_html == True: + template = template.replace('
', seq_cov_html) + else: + template = template.replace('
', seq_cov_img_tag) + + # Generate the pLDDT plot and convert to HTML + plddt_fig = generate_plddt_plot(structures) + plddt_html = plddt_fig.to_html( + full_html=False, + include_plotlyjs="cdn", + config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + ) + template = template.replace('
', plddt_html) + + #Generate PAE plot and conver to HTML TODO: currently onlt the first + if pae_files: + pae_figs = [] + for pae_file in pae_files: + # TODO: ensure PAE files are sorted and limited to num_structs_limit + pae_figs.append(generate_pae_plot(pae_file, out_dir, name, save_image=True)) + pae_html = pae_figs[0].to_html( + full_html=False, + include_plotlyjs="cdn", + config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + ) + template = template.replace('
', pae_html) + # TODO: need logic to keep PAEs in sync with structure upon click + # TODO: look at the Sequence coverage approach (e.g. ESMFold has none) + else: + pass + # TODO: Remove the PAE div if no PAE files are provided. + # The below approach will remove the div but needs dynamic resizing in the report + # pae_section_text = """ + #
PAE
+ #
+ #
+ #
+ #
+ #
+ # """ + # template = template.replace(pae_section_text.strip(), "") + + if write_htmls: + with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: + out_file.write(plddt_html) + with open(f"{out_dir}/{name}_coverage_MSA.html", "w") as out_file: + out_file.write(seq_cov_html) + + # Write the final HTML report + with open(f"{out_dir}/{name}_{type}_report.html", "w") as out_file: + out_file.write(template) + +def main(): + parser = argparse.ArgumentParser(description="Generate protein structure reports.") + parser.add_argument("--name", required=True, help="Name of the report.") + parser.add_argument("--output_dir", required=True, help="Output directory for the report.") + parser.add_argument("--structs", required=True, nargs="+", help="List of structure file paths.") + parser.add_argument("--msa", nargs="+", default=None, help="MSA file path.") + parser.add_argument("--paes", nargs="+", default=None, help="List of PAE file paths (optional).") + parser.add_argument("--prog", default="proteinfold", choices=["alphafold2", "esmfold", "colabfold", "rosettafold-all-atom", "helixfold3", "boltz1"], type=str.lower, help="The program used to generate the structures, can be called in the workflow") + parser.add_argument("--type", default="standard", choices=["standard", "comparison"], help="The type of report file generated .") # TODO: change to --type with options in case there are other reports + #TODO: remove --html_template as this is already determined by the type + parser.add_argument("--html_template", default=None, help="Path to the HTML template for comparison (optional).") + parser.add_argument("--write_htmls", default=True, help="Write out seperate files for each html plot (optional).") + + args = parser.parse_args() + + print("Generating report.....") + + # TODO: want a better way of pathing this + if args.type == "comparison": + html_template = "../.../assets/comparison_template.html" + elif args.type == "standard": + html_template = "../../assets/report_template.html" + else: + html_template = args.html_template + + + # Both these values could be missing - EMSFold for MSA, many others for PAE + if os.path.basename(args.msa) == "NO_FILE": + args.peas=None + if os.path.basename(args.paes) == "NO_FILE": + args.peas=None + + generate_report( + name=args.name, + out_dir=args.output_dir, + structures=args.structs, + num_structs_limit=5, + msa_files=args.msa, + pae_files=args.paes, + prog=args.prog, + type=args.type, + html_template=html_template, + write_htmls=args.write_htmls, + seq_cov_as_html=False, + ) + +if __name__ == "__main__": + main() From 04f07f37e01b4cb2dc452c8b1f9a42533f5b2637 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 14:20:40 +1100 Subject: [PATCH 03/43] remove bs4 since HTML replace is so simple now --- assets/comparison_template.html | 820 ------------------ assets/report_template.html | 212 ++--- bin/generate_comparison_report.py | 324 ------- bin/generate_report.py | 636 ++++---------- bin/plot_utils.py | 309 +++++++ main.nf | 2 - .../local/compare_structures/environment.yml | 4 +- modules/local/compare_structures/main.nf | 8 +- modules/local/generate_report/environment.yml | 4 +- .../local/generate_report/generate_report.py | 202 ++--- modules/local/generate_report/main.nf | 11 +- modules/local/generate_report/plot_utils.py | 275 +++--- subworkflows/local/post_processing.nf | 72 +- 13 files changed, 826 insertions(+), 2053 deletions(-) delete mode 100644 assets/comparison_template.html delete mode 100755 bin/generate_comparison_report.py create mode 100644 bin/plot_utils.py diff --git a/assets/comparison_template.html b/assets/comparison_template.html deleted file mode 100644 index fffe4b133..000000000 --- a/assets/comparison_template.html +++ /dev/null @@ -1,820 +0,0 @@ - - - - - - - Protein structure comparison - - - - - - - - - - - - - - - -
- -
- -
- - - -
- -
- - - - - -
- -
- -
- -
-
Navigation
-
-
- Scroll up/down - to zoom in and out -
-
- Click + drag - to rotate the structure -
-
- CTRL + click + drag - to move the structure -
-
- Click - an atom to bring it into focus -
-
-
-
-
Display
-
- - -
-
-
-
- -
-
-
- -
-
    -
    - -
    -
    -
    Information
    -
    -
    Program: *prog_name*
    -
    ID: *sample_name*
    -
    - Average pLDDT: - -
    -
    -
    -
    -
    Download
    -
    - - -
    -
    -
    -
    -
    -
    pLDDT
    -
    -
    -
    -
    -
    -
    -
    Sequence Coverage
    -
    -
    - -
    - -
    -
    -
    - - - -
    -
    -

    - The Australian BioCommons - is supported by - Bioplatforms Australia -

    -

    - Bioplatforms Australia - is enabled by - NCRIS -

    -
    -
    -
    - - - diff --git a/assets/report_template.html b/assets/report_template.html index 48f644970..1179214cb 100644 --- a/assets/report_template.html +++ b/assets/report_template.html @@ -2,9 +2,8 @@ - - Protein structure prediction + Protein structure report ' + html = html.replace('', f'{config_script}\n', 1) + + # Generate sequence coverage plot from first MSA file + seq_cov_html = None if msa_files: - for msa_file in msa_files: - seq_cov_fig, seq_cov_img_path = generate_sequence_coverage_plot(msa_file, out_dir, name, save_image=True) - seq_cov_img_encoded = base64.b64encode(open(seq_cov_img_path, "rb").read()).decode("utf-8") - seq_cov_img_tag = f'Sequence Coverage Image' - - seq_cov_html = seq_cov_fig.to_html( - full_html=False, - include_plotlyjs="cdn", - config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, - ) - if seq_cov_as_html == True: - template = template.replace('
    ', seq_cov_html) - else: - template = template.replace('
    ', seq_cov_img_tag) + seq_cov_fig = generate_sequence_coverage_plot(msa_files[0], out_dir, name) + seq_cov_html = seq_cov_fig.to_html( + full_html=False, + include_plotlyjs="cdn", + config=PLOTLY_CONFIG, + ) + + # Replace placeholder divs with plot HTML + if seq_cov_html: + html = html.replace('
    ', seq_cov_html, 1) # Generate the pLDDT plot and convert to HTML - plddt_fig = generate_plddt_plot(structures) + plddt_fig = generate_plddt_plot(parsed_structures) plddt_html = plddt_fig.to_html( full_html=False, include_plotlyjs="cdn", - config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + config=PLOTLY_CONFIG, ) - template = template.replace('
    ', plddt_html) + html = html.replace('
    ', plddt_html, 1) - #Generate PAE plot and conver to HTML TODO: currently onlt the first + # Generate PAE plot from first PAE file (TODO: toggle PAE with model selection) if pae_files: - pae_figs = [] - for pae_file in pae_files: - # TODO: ensure PAE files are sorted and limited to num_structs_limit - pae_figs.append(generate_pae_plot(pae_file, out_dir, name, save_image=True)) - pae_html = pae_figs[0].to_html( + pae_fig = generate_pae_plot(pae_files[0], out_dir, name) + pae_html = pae_fig.to_html( full_html=False, include_plotlyjs="cdn", - config={"displayModeBar": True, "displaylogo": False, "scrollZoom": True}, + config=PLOTLY_CONFIG, ) - template = template.replace('
    ', pae_html) - # TODO: need logic to keep PAEs in sync with structure upon click - # TODO: look at the Sequence coverage approach (e.g. ESMFold has none) - else: - pass - # TODO: Remove the PAE div if no PAE files are provided. - # The below approach will remove the div but needs dynamic resizing in the report - # pae_section_text = """ - #
    PAE
    - #
    - #
    - #
    - #
    - #
    - # """ - # template = template.replace(pae_section_text.strip(), "") + html = html.replace('
    ', pae_html, 1) if write_htmls: with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: out_file.write(plddt_html) - with open(f"{out_dir}/{name}_coverage_MSA.html", "w") as out_file: - out_file.write(seq_cov_html) + if seq_cov_html: + with open(f"{out_dir}/{name}_coverage_MSA.html", "w") as out_file: + out_file.write(seq_cov_html) # Write the final HTML report with open(f"{out_dir}/{name}_{type}_report.html", "w") as out_file: - out_file.write(template) + out_file.write(html) def main(): parser = argparse.ArgumentParser(description="Generate protein structure reports.") parser.add_argument("--name", required=True, help="Name of the report.") parser.add_argument("--output_dir", required=True, help="Output directory for the report.") - parser.add_argument("--structs", required=True, nargs="+", help="List of structure file paths.") - parser.add_argument("--msa", nargs="+", default=None, help="MSA file path.") - parser.add_argument("--paes", nargs="+", default=None, help="List of PAE file paths (optional).") - parser.add_argument("--prog", default="proteinfold", choices=["alphafold2", "esmfold", "colabfold", "rosettafold-all-atom", "helixfold3", "boltz1"], type=str.lower, help="The program used to generate the structures, can be called in the workflow") - parser.add_argument("--type", default="standard", choices=["standard", "comparison"], help="The type of report file generated .") # TODO: change to --type with options in case there are other reports - #TODO: remove --html_template as this is already determined by the type - parser.add_argument("--html_template", default=None, help="Path to the HTML template for comparison (optional).") - parser.add_argument("--write_htmls", default=True, help="Write out seperate files for each html plot (optional).") + parser.add_argument("--structs", required=True, nargs="+", help="List of structure file paths (.pdb or .cif).") + parser.add_argument("--msa", nargs="+", default=None, help="MSA file path(s).") + parser.add_argument("--pae", nargs="+", default=None, help="PAE file path(s).") + parser.add_argument("--prog", default="proteinfold", choices=["proteinfold", "alphafold2", "alphafold3", "esmfold", "colabfold", "rosettafold-all-atom", "rosettafold2na", "helixfold3", "boltz", "comparison"], type=str.lower, help="The program used to generate the structures.") + parser.add_argument("--type", default="standard", choices=["standard", "comparison"], help="The type of report to generate.") + parser.add_argument("--html_template", default=None, help="Path to the HTML report template.") + parser.add_argument("--write_htmls", default=True, help="Write out separate files for each html plot.") args = parser.parse_args() print("Generating report.....") - # TODO: want a better way of pathing this - if args.type == "comparison": - html_template = "../.../assets/comparison_template.html" - elif args.type == "standard": - html_template = "../../assets/report_template.html" - else: - html_template = args.html_template - + html_template = args.html_template or get_template_path() - # Both these values could be missing - EMSFold for MSA, many others for PAE - if os.path.basename(args.msa) == "NO_FILE": - args.peas=None - if os.path.basename(args.paes) == "NO_FILE": - args.peas=None + # Both these values could be missing - ESMFold for MSA, many others for PAE + if args.msa and os.path.basename(args.msa[0]) == "NO_FILE": + args.msa = None + if args.pae and os.path.basename(args.pae[0]) == "NO_FILE": + args.pae = None generate_report( name=args.name, @@ -167,12 +158,11 @@ def main(): structures=args.structs, num_structs_limit=5, msa_files=args.msa, - pae_files=args.paes, + pae_files=args.pae, prog=args.prog, type=args.type, html_template=html_template, write_htmls=args.write_htmls, - seq_cov_as_html=False, ) if __name__ == "__main__": diff --git a/modules/local/generate_report/main.nf b/modules/local/generate_report/main.nf index f33599828..7d90ef821 100644 --- a/modules/local/generate_report/main.nf +++ b/modules/local/generate_report/main.nf @@ -14,7 +14,7 @@ process GENERATE_REPORT { output: tuple val(meta), path ("*report.html") , emit: report tuple val(meta), path ("*seq_coverage.png"), optional: true, emit: sequence_coverage - tuple val(meta), path ("*_LDDT.html") , emit: plddt + tuple val(meta), path ("*_pLDDT.html") , emit: plddt path "versions.yml" , emit: versions when: @@ -25,14 +25,15 @@ process GENERATE_REPORT { """ generate_report.py \\ - --type ${meta.model} \\ + --type standard \\ + --prog ${meta.model} \\ --msa ${msa} \\ --pae ${pae} \\ - --pdb ${pdb.join(' ')} \\ + --structs ${pdb.join(' ')} \\ --html_template ${template} \\ --output_dir ./ \\ --name ${meta.id} \\ - $args \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -45,7 +46,7 @@ process GENERATE_REPORT { """ touch test_alphafold2_report.html touch test_seq_coverage.png - touch test_LDDT.html + touch test_pLDDT.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/generate_report/plot_utils.py b/modules/local/generate_report/plot_utils.py index 6b56726bc..eaf9d02e4 100644 --- a/modules/local/generate_report/plot_utils.py +++ b/modules/local/generate_report/plot_utils.py @@ -1,65 +1,70 @@ -from collections import OrderedDict import plotly.graph_objects as go from Bio import PDB -import matplotlib.pyplot as plt + import numpy as np import os def reset_residue_numbers(structure): """ - Resets residue numbering in a PDB file, because ESMFold starts - and increment only when encountering a new residue. + Resets residue numbering in a PDB file, because ESMFold starts renumbering + at 1 for each chain and increments only when encountering a new residue. """ if str(structure).endswith(".pdb"): parser = PDB.PDBParser(QUIET=True) elif str(structure).endswith(".cif"): parser = PDB.MMCIFParser(QUIET=True) else: - print(f"{structure} is neither a PDB or mmCIF file!") - return - - structure = parser.get_structure("structure", structure) + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") - for model in structure: - for idx, residue in enumerate(model.get_residues(), start=1): - # Do a swap in place to renumber the residue, the other entries in the tuple can stay the same - # See: https://biopython.org/docs/1.76/api/Bio.PDB.Chain.html#Bio.PDB.Chain.Chain.__getitem__ - het_atom, _, insertion_code = residue.get_id() - residue.id = (het_atom, idx, insertion_code) + struct_obj = parser.get_structure("structure", structure) - io = PDB.PDBIO() - io.set_structure(structure) + for model in struct_obj: + for chain in model: + for idx, residue in enumerate(chain.get_residues(), start=1): + # Do a swap in place to renumber the residue, the other entries in the tuple can stay the same + # See: https://biopython.org/docs/1.76/api/Bio.PDB.Chain.html#Bio.PDB.Chain.Chain.__getitem__ + het_atom, _, insertion_code = residue.get_id() + residue.id = (het_atom, idx, insertion_code) - return structure + return struct_obj # TODO: Barcelona team to implement AF3 def sort_structures_by_rank(structures, prog): """ - Sorts a list of structures based on their rank. Needs to handle different program naming + Sorts a list of structures based on their rank. Handles different program naming conventions. + + Returns: + List of structure files sorted by rank (always returns list, even for single structures) """ if prog == "alphafold2": # AlphaFold2 structures are named with [run]/ranked_[rank].pdb sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).replace('ranked_', '').split('.')[0])) - if prog == "colabfold": + elif prog == "colabfold": # ColabFold structures are named with [run]_unrelaxed_rank_[rank]_alphafold2_ptm_model_[num]_seed_[seed].pdb sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_')[3])) - if prog == "helixfold3": + elif prog == "helixfold3": # HelixFold3 structures are named with .../[run]/[run]-rank[rank]/predicted_structure.pdb sorted_structures = sorted(structures, key=lambda x: int(os.path.dirname(x).split('rank')[-1])) - if prog == "esmfold" or "rosettafold-all-atom": + elif prog == "boltz": + # Boltz structures are named with ..._model_[diffusion_samples-1].[pdb|cif] + sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_model_')[-1].split('.')[0])) + elif prog == "esmfold" or prog == "rosettafold-all-atom": # ESMFold and RoseTTAFold only produce one structure - sorted_structures = structures[0] - if prog == "boltz1": - # Boltz1 structures are named with ..._model_[diffusion_samples-1].[pdb|cif] - sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_model_')[-1])) + sorted_structures = structures if isinstance(structures, list) else [structures] else: print(f"Warning: Sorting not implemented for {prog}. Using original order.") - return structures + sorted_structures = structures if isinstance(structures, list) else [structures] - return sorted_structures + return sorted_structures if isinstance(sorted_structures, list) else [sorted_structures] def align_structures(structures): - + """ + Align multiple structures against the first (reference) structure. + Uses common atoms for superimposition (handles cases where structures aren't complete). + + Returns: + List of BioPython structure objects aligned to the first structure + """ if not structures: raise ValueError("No structures provided for alignment.") @@ -68,7 +73,7 @@ def align_structures(structures): elif structures[0].endswith(".cif"): parser = PDB.MMCIFParser(QUIET=True) else: - raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + raise ValueError(f"{structures[0]} is neither a PDB or mmCIF file!") parsed_structures = [parser.get_structure(f"structure-{idx}", structure) for idx, structure in enumerate(structures)] ref_structure = parsed_structures[0] @@ -77,76 +82,69 @@ def get_atom_ids(structure): # Note: this is a *set* of atom_ids due to the {} surrounding the comprehension return {(atom.get_parent().get_id(), atom.name) for atom in structure.get_atoms()} - # TODO: do we want to raise and error if the structures are not identical atomically, or keep the ability to sub-align? - # Update the atoms shared between structures with progressive intersections + # Find common atoms across all structures (progressive intersection) + # This allows alignment even if structures are incomplete or have different atom coverage common_atoms = get_atom_ids(ref_structure) for structure in parsed_structures[1:]: common_atoms.intersection_update(get_atom_ids(structure)) if not common_atoms: - raise ValueError("No common atoms found between structures.") + raise ValueError("No common atoms found between structures for alignment.") def extract_atoms(structure, atom_ids): - # Note: this comprehension returns an atom *object* for each atom in the structure - return {atom for atom in structure.get_atoms() if (atom.get_parent().get_id(), atom.name) in atom_ids} + # Must return a sorted list (not set) so ref/target atoms correspond positionally + atoms = [atom for atom in structure.get_atoms() if (atom.get_parent().get_id(), atom.name) in atom_ids] + return sorted(atoms, key=lambda a: (a.get_parent().get_id(), a.name)) ref_atoms = extract_atoms(ref_structure, common_atoms) # The aligned structures will be the parsed structures aligned to the common atoms of the reference structure super_imposer = PDB.Superimposer() - aligned_structures = [] - for idx, structure in enumerate(parsed_structures): - # The reference structure doesn't need to be aligned so can be skipped - if idx == 0: - aligned_structures.append(structure) - continue - + aligned_structures = [ref_structure] # Reference needs no alignment + for idx, structure in enumerate(parsed_structures[1:], start=1): target_atoms = extract_atoms(structure, common_atoms) - super_imposer.set_atoms(ref_atoms, target_atoms) + super_imposer.set_atoms(list(ref_atoms), list(target_atoms)) super_imposer.apply(structure.get_atoms()) - - io = PDB.PDBIO() - io.set_structure(structure) aligned_structures.append(structure) - # Technically, parsed_structures now also points to the same aligned structures, but I've kept for readability return aligned_structures def plddt_from_struct_b_factor(structure): """ - Uses the BioPython PDB package to extract residue pLDDT values from the b-factor column. Iterates over PDB objects rather than processes raw file + Extracts residue pLDDT values from the b-factor column using BioPython. + Accepts either a file path (str/Path) or a pre-parsed BioPython Structure object. """ - if str(structure).endswith(".pdb"): - parser = PDB.PDBParser(QUIET=True) - structure = parser.get_structure(id=id, file=structure) - elif str(structure).endswith(".cif"): - parser = PDB.MMCIFParser(QUIET=True) - structure = parser.get_structure(structure_id=id, filename=structure) + if isinstance(structure, (str, os.PathLike)): + if str(structure).endswith(".pdb"): + parser = PDB.PDBParser(QUIET=True) + elif str(structure).endswith(".cif"): + parser = PDB.MMCIFParser(QUIET=True) + else: + raise ValueError(f"{structure} is neither a PDB or mmCIF file!") + struct_obj = parser.get_structure(os.path.basename(str(structure)), str(structure)) else: - print(f"{structure} is neither a PDB or mmCIF file!") + # Already a BioPython structure object + struct_obj = structure - res_list = [] res_plddts = [] - plddt_tot = 0 - for model in structure: + for model in struct_obj: for chain in model: - chain_res_list = chain.get_unpacked_list() - res_list.extend(chain_res_list) for residue in chain: atom_list = residue.get_unpacked_list() atom_plddt_tot = 0 - for atom in residue: # ESMFold and others have separate atom-wise values, so doing atom-wise to cover that and residue-wise + # Handle both atom-wise and residue-wise pLDDT values + for atom in residue: atom_plddt = atom.get_bfactor() atom_plddt_tot += atom_plddt - res_plddt = float(atom_plddt_tot / len(atom_list)) + res_plddt = float(atom_plddt_tot / len(atom_list)) if atom_list else 0.0 - if (res_plddt < 1): # RFAA the multiplication of mean isn't failing. Anyway covering to a [0,100] range for any structure file1 + # Ensure values are in [0, 100] range + if res_plddt < 1: res_plddt *= 100 res_plddts.append(res_plddt) - plddt_tot += res_plddt res_plddts = np.array(res_plddts) res_plddts = np.round(res_plddts, 2) @@ -155,39 +153,39 @@ def plddt_from_struct_b_factor(structure): def generate_plddt_plot(structures): """ - Generate a Plotly figure for predicted LDDT per position for given structures. + Generate a Plotly figure for pLDDT per position for given structures. Args: - structures (list): List of structure file paths. + structures (list): List of structure file paths or BioPython structure objects. Returns: go.Figure: Plotly figure object with pLDDT data. """ - plddt_per_struct = OrderedDict() + plddt_per_struct = {} - for struct in structures: - plddt_per_struct[struct] = plddt_from_struct_b_factor(struct) + for idx, struct in enumerate(structures): + plddt_per_struct[f"rank-{idx}"] = plddt_from_struct_b_factor(struct) fig = go.Figure() - for idx, (struct, plddts) in enumerate(plddt_per_struct.items()): + for idx, (name, plddts) in enumerate(plddt_per_struct.items()): fig.add_trace( go.Scatter( x=list(range(len(plddts))), y=plddts, mode="lines", - name=f"rank-{idx}", - text=[f"({idx}, {value:.2f})" for idx, value in enumerate(plddts)], + name=name, + text=[f"({pos}, {value:.2f})" for pos, value in enumerate(plddts)], hoverinfo="text", ) ) fig.update_layout( - title=dict(text="Predicted LDDT per position", x=0.5, xanchor="center"), + title=dict(text="pLDDT per position", x=0.5, xanchor="center"), xaxis=dict( title="Positions", showline=True, linecolor="black", gridcolor="WhiteSmoke" ), yaxis=dict( - title="Predicted LDDT", + title="pLDDT", range=[0, 100], showline=True, linecolor="black", @@ -221,104 +219,91 @@ def process_msas(msa_path): return final_msas, non_gaps_msas -def generate_sequence_coverage_plot(msa_path, out_dir, name, save_image=True): +def generate_sequence_coverage_plot(msa_path, out_dir, name, save_image=False): + """ + Generate an interactive Plotly heatmap for sequence coverage with depth overlay. + """ final_msas, non_gaps_msas = process_msas(msa_path) - # seq_depth_counts = np.sum(~np.isnan(non_gaps_msas), axis=0) - # TODO: don't have a seperate save image plot and an HTML plotly ploy - # ################################################################## - # Plot the sequence coverage with matplotlib and save as image - # ################################################################## - if save_image: - image_path = f"{out_dir}/{name+('_' if name else '')}seq_coverage.png" - plt.figure(figsize=(14, 14), dpi=100) - plt.title("Sequence coverage", fontsize=30, pad=36) - plt.imshow( - final_msas, - interpolation="nearest", - aspect="auto", - cmap="rainbow_r", - vmin=0, - vmax=1, - origin="lower", - ) - - - plt.plot(seq_depth_counts, color="black") - plt.xlim(-0.5, len(final_msas[0]) - 0.5) - plt.ylim(-0.5, len(final_msas) - 0.5) - - plt.tick_params(axis="both", which="both", labelsize=18) - - cbar = plt.colorbar() - cbar.set_label("Sequence identity to query", fontsize=24, labelpad=24) - cbar.ax.tick_params(labelsize=18) - plt.xlabel("Positions", fontsize=24, labelpad=24) - plt.ylabel("Sequences", fontsize=24, labelpad=36) - plt.savefig(image_path) - - # ################################################################## - # Interactive HTML plot of sequence coverage - fig = go.Figure() - fig.add_trace( - go.Heatmap( - z=final_msas, - colorscale="Rainbow_r", - zmin=0, - zmax=1, - colorbar={"title": 'Your title'} - ) - ) - # Add black line for sequence coverage depth - fig.add_trace( - go.Scatter( - x=list(range(len(seq_depth_counts))), - y=seq_depth_counts, - mode="lines", - line=dict(color="black", width=2), - name="Coverage Depth", - ) + # Create interactive Plotly figure + fig = go.Figure() + + # Add heatmap for sequence coverage + fig.add_trace( + go.Heatmap( + z=final_msas, + colorscale="Rainbow_r", + zmin=0, + zmax=1, + colorbar={"title": "Sequence
    identity"}, + name="", ) - fig.update_layout( - title=dict(text="Sequence coverage", x=0.5, xanchor="center"), - xaxis_title="Positions", yaxis_title="Sequences", + ) + + # Add black line for sequence coverage depth as secondary trace + fig.add_trace( + go.Scatter( + x=list(range(len(seq_depth_counts))), + y=seq_depth_counts, + mode="lines", + line=dict(color="black", width=2), + name="Coverage Depth", + yaxis="y2", ) + ) + + # Update layout with dual y-axes + fig.update_layout( + title=dict(text="Sequence coverage", x=0.5, xanchor="center"), + xaxis_title="Positions", + yaxis_title="Sequences", + yaxis2=dict( + title="Coverage Depth", + overlaying="y", + side="right", + ), + width=800, + height=600, + ) if save_image: + image_path = f"{out_dir}/{name+('_' if name else '')}seq_coverage.png" + fig.write_image(image_path, width=800, height=600) return fig, image_path else: return fig -def generate_pae_plot(pae_path, out_dir, name, save_image=True): +def generate_pae_plot(pae_path, out_dir, name, save_image=False): """ - Generate a Plotly heatmap for Predicted Aligned Error (PAE) data. - - Args: - pae (2D array): The PAE matrix. - Returns: - fig: A Plotly figure object of the PAE heatmap in green color scale + Generate an interactive Plotly heatmap for Predicted Aligned Error (PAE) data. """ pae = np.genfromtxt(pae_path, delimiter="\t") max_pae = np.max(pae) fig = go.Figure() - # Add heatmap + # Add heatmap with green colorscale fig.add_trace( go.Heatmap( z=pae, colorscale="Greens_r", zmin=0, zmax=max_pae, + colorbar={"title": "PAE (Å)"}, ) ) + fig.update_layout( - xaxis=dict(title="Scored Residue"), - yaxis=dict(title="Aligned Residue"), + title=dict(text="Predicted Aligned Error", x=0.5, xanchor="center"), + xaxis=dict(title="Scored Residue"), + yaxis=dict(title="Aligned Residue"), + width=800, + height=800, ) if save_image: - image_path = f"{out_dir}/{name+('_' if name else '')}pae.png" - fig.write_image(image_path, width=800, height=800) - - return fig + image_path = f"{out_dir}/{name+('_' if name else '')}pae.png" + fig.write_image(image_path, width=800, height=800) + return fig, image_path + else: + return fig diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index d100361ce..0aaa2fe1d 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -23,7 +23,6 @@ workflow POST_PROCESSING { requested_modes_size ch_report_input ch_report_template - ch_comparison_template skip_foldseek foldseek_db foldseek_db_path @@ -48,48 +47,47 @@ workflow POST_PROCESSING { ch_versions = ch_versions.mix(GENERATE_REPORT.out.versions) if (requested_modes_size > 1){ - ch_dummy_file = channel.fromPath("$projectDir/assets/NO_FILE") - - def esm = ch_top_ranked_model.filter { it ->it[0].model == 'esmfold' } - def not_esm = ch_top_ranked_model.filter { it -> it[0].model != 'esmfold' } - - esm = esm - .map { it -> - [it[0], it[1]] - } - .merge(ch_dummy_file) - - not_esm = not_esm - .map { it -> [it[0], it[1]] } - .join(GENERATE_REPORT.out.sequence_coverage) - - not_esm.mix(esm).set{ch_comparison_report_files} - - ch_comparison_report_files - .map { it -> - [["id": it[0].id], it[0], it[1], it[2]] + // Multi-mode comparison: group structures and coverage data from all modes + ch_top_ranked_model + .map { meta, pdb -> + [["id": meta.id], meta, pdb] } + .join( + GENERATE_REPORT.out.sequence_coverage, + by: [0], + remainder: true // Include models without coverage (e.g., ESMFold) + ) .groupTuple(by: [0], size: requested_modes_size) - .map { it -> - it[0].models=it[1].join(','); - [it[0], it[2], it[3]] + .map { key, model_meta_list, coverage_list -> + key.models = model_meta_list.collect { meta, pdb -> meta.model }.join(',') + [key, model_meta_list.collect { meta, pdb -> pdb }, coverage_list] } .set { ch_comparison_report_input } + // Separate channel components for clarity + ch_comparison_report_input + .map { meta, structures, coverage -> + [meta, structures.collect { f -> f.name }] + } + .set { ch_pdb_input } + + ch_comparison_report_input + .map { meta, structures, coverage -> + [meta, coverage.findAll { f -> f != null }.collect { f -> f.name }] + } + .set { ch_msa_input } + + ch_comparison_report_input + .map { meta, structures, coverage -> + (structures + coverage.findAll { f -> f != null }).unique() + } + .set { ch_all_files } + COMPARE_STRUCTURES( - ch_comparison_report_input - .map { it -> - [it[0], it[1].collect { file -> file.name} ] - }, - ch_comparison_report_input - .map { it -> - [ it[0], it[2].collect { file -> file.name } ] - }, - ch_comparison_report_input - .map { it -> - (it[1] + it[2]).unique() - }, - ch_comparison_template + ch_pdb_input, + ch_msa_input, + ch_all_files, + ch_report_template ) ch_versions = ch_versions.mix(COMPARE_STRUCTURES.out.versions) } From 1839c30eda642d22be4b4e855d0fb72888c216d5 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 14:49:25 +1100 Subject: [PATCH 04/43] separate MSA and PAE DUMMY files to avoid clashes and check generically --- assets/{NO_FILE => DUMMY_MSA} | 0 assets/{NO_FILE_PAE => DUMMY_PAE} | 0 assets/report_template.html | 12 ++--- bin/generate_report.py | 49 +++++++------------ main.nf | 8 +-- .../local/generate_report/generate_report.py | 15 +++--- 6 files changed, 38 insertions(+), 46 deletions(-) rename assets/{NO_FILE => DUMMY_MSA} (100%) rename assets/{NO_FILE_PAE => DUMMY_PAE} (100%) mode change 100644 => 100755 modules/local/generate_report/generate_report.py diff --git a/assets/NO_FILE b/assets/DUMMY_MSA similarity index 100% rename from assets/NO_FILE rename to assets/DUMMY_MSA diff --git a/assets/NO_FILE_PAE b/assets/DUMMY_PAE similarity index 100% rename from assets/NO_FILE_PAE rename to assets/DUMMY_PAE diff --git a/assets/report_template.html b/assets/report_template.html index 1179214cb..ea80f9175 100644 --- a/assets/report_template.html +++ b/assets/report_template.html @@ -249,7 +249,7 @@
    Average pLDDT: - +
    @@ -396,7 +396,7 @@ Residue confidence - pLDDT
    -
    +
    @@ -508,7 +508,7 @@ structFormat: "pdb", models: [], models_data: [], - lddt_averages: [], + plddt_averages: [], }; // Load report configuration from embedded JSON @@ -524,7 +524,7 @@ const MODELS = config.models; const MODELS_DATA = config.models_data; - const LDDT_AVERAGES = config.lddt_averages; + const PLDDT_AVERAGES = config.plddt_averages; const PROGRAM_NAME = config.programName; const SAMPLE_NAME = config.sampleName; const STRUCT_FORMAT = config.structFormat; @@ -560,7 +560,7 @@ // Handle window resizing window.addEventListener("resize", () => stage.handleResize()); - document.getElementById("lddt-average").textContent = LDDT_AVERAGES[0]; + document.getElementById("plddt-average").textContent = PLDDT_AVERAGES[0]; loadModel(); loadModelImage(); @@ -581,7 +581,7 @@ const setModel = (ix) => { state.model = ix; - document.getElementById("lddt-average").textContent = LDDT_AVERAGES[ix]; + document.getElementById("plddt-average").textContent = PLDDT_AVERAGES[ix]; stage.removeComponent(state.modelObject); setLoading(1); diff --git a/bin/generate_report.py b/bin/generate_report.py index 8540cce89..51b737926 100755 --- a/bin/generate_report.py +++ b/bin/generate_report.py @@ -8,7 +8,6 @@ generate_pae_plot, generate_sequence_coverage_plot, ) -from bs4 import BeautifulSoup import json import argparse import os @@ -66,9 +65,9 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No print("Structures:", structure_paths) - # Parse HTML template with BeautifulSoup + # Read HTML template with open(html_template, "r") as f: - soup = BeautifulSoup(f.read(), "html.parser") + html = f.read() # Build configuration JSON for JavaScript config = { @@ -77,23 +76,13 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No "programName": prog_name_mapping.get(prog, prog), "structFormat": struct_format, "models": [f"Rank {idx+1}" for idx, _ in enumerate(parsed_structures)], - "lddt_averages": [round(plddt_from_struct_b_factor(s).mean(), 2) for s in parsed_structures], + "plddt_averages": [round(plddt_from_struct_b_factor(s).mean(), 2) for s in parsed_structures], "models_data": [open(s, "r").read().replace("\n", "\\n") for s in structure_paths], } - # Inject configuration as JSON into a script tag - config_script = soup.new_tag("script", type="application/json", attrs={"id": "report-config"}) - config_script.string = json.dumps(config) - - # Find or create head section and add config script - head = soup.find("head") - if head: - head.append(config_script) - else: - # Fallback: add before first script tag - first_script = soup.find("script") - if first_script: - first_script.insert_before(config_script) + # Inject configuration as a JSON script tag before + config_script = f'' + html = html.replace('', f'{config_script}\n', 1) # Generate sequence coverage plot from first MSA file seq_cov_html = None @@ -105,10 +94,9 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No config=PLOTLY_CONFIG, ) - # Replace placeholder divs with content using BeautifulSoup - seq_cov_placeholder = soup.find("div", attrs={"id": "seq_cov_placeholder"}) - if seq_cov_placeholder and seq_cov_html: - seq_cov_placeholder.replace_with(BeautifulSoup(seq_cov_html, "html.parser")) + # Replace placeholder divs with plot HTML + if seq_cov_html: + html = html.replace('
    ', seq_cov_html, 1) # Generate the pLDDT plot and convert to HTML plddt_fig = generate_plddt_plot(parsed_structures) @@ -117,9 +105,7 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No include_plotlyjs="cdn", config=PLOTLY_CONFIG, ) - lddt_placeholder = soup.find("div", attrs={"id": "lddt_placeholder"}) - if lddt_placeholder: - lddt_placeholder.replace_with(BeautifulSoup(plddt_html, "html.parser")) + html = html.replace('
    ', plddt_html, 1) # Generate PAE plot from first PAE file (TODO: toggle PAE with model selection) if pae_files: @@ -129,9 +115,7 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No include_plotlyjs="cdn", config=PLOTLY_CONFIG, ) - pae_placeholder = soup.find("div", attrs={"id": "pae_placeholder"}) - if pae_placeholder: - pae_placeholder.replace_with(BeautifulSoup(pae_html, "html.parser")) + html = html.replace('
    ', pae_html, 1) if write_htmls: with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: @@ -142,7 +126,7 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No # Write the final HTML report with open(f"{out_dir}/{name}_{type}_report.html", "w") as out_file: - out_file.write(str(soup)) + out_file.write(html) def main(): parser = argparse.ArgumentParser(description="Generate protein structure reports.") @@ -163,10 +147,15 @@ def main(): html_template = args.html_template or get_template_path() # Both these values could be missing - ESMFold for MSA, many others for PAE - if args.msa and os.path.basename(args.msa[0]) == "NO_FILE": + if args.msa and os.path.basename(args.msa[0]).startswith("DUMMY_"): args.msa = None - if args.pae and os.path.basename(args.pae[0]) == "NO_FILE": + if args.pae and os.path.basename(args.pae[0]).startswith("DUMMY_"): args.pae = None + # Catch-all for any future optional metric args, if we have plots for pTM or other missing values. The above two are more common and explicit + for attr in vars(args): + val = getattr(args, attr) + if isinstance(val, list) and val and os.path.basename(val[0]).startswith("DUMMY_"): + setattr(args, attr, None) generate_report( name=args.name, diff --git a/main.nf b/main.nf index ccbadd9a6..b8c235772 100644 --- a/main.nf +++ b/main.nf @@ -74,8 +74,8 @@ workflow NFCORE_PROTEINFOLD { requested_modes = params.mode.toLowerCase().split(",") requested_modes_size = requested_modes.size() - ch_dummy_file = channel.fromPath("$projectDir/assets/NO_FILE") - ch_dummy_file_pae = channel.fromPath("$projectDir/assets/NO_FILE_PAE") + ch_dummy_msa = channel.fromPath("$projectDir/assets/DUMMY_MSA") + ch_dummy_pae = channel.fromPath("$projectDir/assets/DUMMY_PAE") // // WORKFLOW: Run alphafold2 @@ -324,8 +324,8 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(ESMFOLD.out.versions) ch_report_input = ch_report_input.mix( ESMFOLD.out.pdb - .combine(ch_dummy_file) - .combine(ch_dummy_file_pae) + .combine(ch_dummy_msa) + .combine(ch_dummy_pae) ) ch_top_ranked_model = ch_top_ranked_model.mix(ESMFOLD.out.pdb) } diff --git a/modules/local/generate_report/generate_report.py b/modules/local/generate_report/generate_report.py old mode 100644 new mode 100755 index e151c9030..86dccd315 --- a/modules/local/generate_report/generate_report.py +++ b/modules/local/generate_report/generate_report.py @@ -76,7 +76,7 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No "programName": prog_name_mapping.get(prog, prog), "structFormat": struct_format, "models": [f"Rank {idx+1}" for idx, _ in enumerate(parsed_structures)], - "lddt_averages": [round(plddt_from_struct_b_factor(s).mean(), 2) for s in parsed_structures], + "plddt_averages": [round(plddt_from_struct_b_factor(s).mean(), 2) for s in parsed_structures], "models_data": [open(s, "r").read().replace("\n", "\\n") for s in structure_paths], } @@ -105,7 +105,7 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No include_plotlyjs="cdn", config=PLOTLY_CONFIG, ) - html = html.replace('
    ', plddt_html, 1) + html = html.replace('
    ', plddt_html, 1) # Generate PAE plot from first PAE file (TODO: toggle PAE with model selection) if pae_files: @@ -147,11 +147,14 @@ def main(): html_template = args.html_template or get_template_path() # Both these values could be missing - ESMFold for MSA, many others for PAE - if args.msa and os.path.basename(args.msa[0]) == "NO_FILE": + if args.msa and os.path.basename(args.msa[0]).startswith("DUMMY_MSA"): args.msa = None - if args.pae and os.path.basename(args.pae[0]) == "NO_FILE": - args.pae = None - + if args.pae and os.path.basename(args.pae[0]).startswith("DUMMY_PAE"): + args.pae = None # Catch-all for any future optional metric args + for attr in vars(args): + val = getattr(args, attr) + if isinstance(val, list) and val and os.path.basename(val[0]).startswith("DUMMY_"): + setattr(args, attr, None) generate_report( name=args.name, out_dir=args.output_dir, From 3570829bb81656cce4c8a547fefc9d58a2fbc77e Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 14:59:11 +1100 Subject: [PATCH 05/43] remove matcher because sort_structures_by_rank() in plot_utils.py handles this more robusts on a per-program basis, can be source of truth --- main.nf | 63 ++------------------------- subworkflows/local/post_processing.nf | 2 +- 2 files changed, 5 insertions(+), 60 deletions(-) diff --git a/main.nf b/main.nf index b8c235772..f5bd0b92a 100644 --- a/main.nf +++ b/main.nf @@ -139,22 +139,7 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) ch_report_input = ch_report_input - .mix(ALPHAFOLD2 - .out - .pdb - .map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + .mix(ALPHAFOLD2.out.pdb .join(ALPHAFOLD2.out.msa) .join(ALPHAFOLD2.out.pae) ) @@ -213,23 +198,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(ALPHAFOLD3.out.versions) ch_report_input = ch_report_input .mix( - ALPHAFOLD3 - .out - .pdb - .map { it -> - [ - it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /.*_ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + ALPHAFOLD3.out.pdb .join(ALPHAFOLD3.out.msa) .join(ALPHAFOLD3.out.pae) ) @@ -273,19 +242,7 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) ch_versions = ch_versions.mix(COLABFOLD.out.versions) ch_report_input = ch_report_input - .mix(COLABFOLD.out.pdb.map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /_relaxed_rank_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + .mix(COLABFOLD.out.pdb .join(COLABFOLD.out.msa) .join(COLABFOLD.out.pae) ) @@ -437,19 +394,7 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) ch_report_input = ch_report_input - .mix(HELIXFOLD3.out.pdb.map { it -> - [ it[0], - it[1].sort { path -> - def filename = path.name - def matcher = filename =~ /ranked_(\d+)\.pdb/ - if (matcher.matches()) { - return matcher[0][1].toInteger() - } else { - return 0 // fallback if no match - } - }.subList(0, Math.min(5, it[1].size() as int)) - ] - } + .mix(HELIXFOLD3.out.pdb .join(HELIXFOLD3.out.msa) .join(HELIXFOLD3.out.pae) ) diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 0aaa2fe1d..0db03e5b3 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -139,7 +139,7 @@ workflow POST_PROCESSING { .collect() .map { it -> [it] } ) - .map { it -> [ it[0], it[1] + it[2] ] }, + .map { meta, report_files, multiqc_files -> [ meta, report_files + multiqc_files ] }, ch_multiqc_config, ch_multiqc_custom_config .collect() From 77af3e3a8e9b7e52908a0dd9dd6f5a87afcc7035 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 15:11:16 +1100 Subject: [PATCH 06/43] remove hard to read inconsistent whitespace in channel construction --- main.nf | 88 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/main.nf b/main.nf index f5bd0b92a..361bf34e9 100644 --- a/main.nf +++ b/main.nf @@ -136,10 +136,11 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, PREPARE_ALPHAFOLD2_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) - ch_report_input = ch_report_input - .mix(ALPHAFOLD2.out.pdb + ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) + ch_report_input = ch_report_input + .mix( + ALPHAFOLD2.out.pdb .join(ALPHAFOLD2.out.msa) .join(ALPHAFOLD2.out.pae) ) @@ -241,8 +242,9 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) ch_versions = ch_versions.mix(COLABFOLD.out.versions) - ch_report_input = ch_report_input - .mix(COLABFOLD.out.pdb + ch_report_input = ch_report_input + .mix( + COLABFOLD.out.pdb .join(COLABFOLD.out.msa) .join(COLABFOLD.out.pae) ) @@ -279,11 +281,12 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ESMFOLD.out.versions) - ch_report_input = ch_report_input.mix( - ESMFOLD.out.pdb - .combine(ch_dummy_msa) - .combine(ch_dummy_pae) - ) + ch_report_input = ch_report_input + .mix( + ESMFOLD.out.pdb.map { meta, pdb -> [meta, [pdb]] } + .combine(ch_dummy_msa) + .combine(ch_dummy_pae) + ) ch_top_ranked_model = ch_top_ranked_model.mix(ESMFOLD.out.pdb) } @@ -321,13 +324,15 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.rfaa_paper_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD_ALL_ATOM.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) - ch_report_input = ch_report_input.mix(ROSETTAFOLD_ALL_ATOM.out.pdb - .join(ROSETTAFOLD_ALL_ATOM.out.msa) - .join(ROSETTAFOLD_ALL_ATOM.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD_ALL_ATOM.out.pdb) + ch_multiqc = ch_multiqc.mix(ROSETTAFOLD_ALL_ATOM.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) + ch_report_input = ch_report_input + .mix( + ROSETTAFOLD_ALL_ATOM.out.pdb.map { meta, pdb -> [meta, [pdb]] } + .join(ROSETTAFOLD_ALL_ATOM.out.msa) + .join(ROSETTAFOLD_ALL_ATOM.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD_ALL_ATOM.out.pdb) } // @@ -391,10 +396,11 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) - ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) - ch_report_input = ch_report_input - .mix(HELIXFOLD3.out.pdb + ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + ch_report_input = ch_report_input + .mix( + HELIXFOLD3.out.pdb .join(HELIXFOLD3.out.msa) .join(HELIXFOLD3.out.pae) ) @@ -440,18 +446,15 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD2NA_DBS.out.rna, PREPARE_ROSETTAFOLD2NA_DBS.out.rosettafold2na_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD2NA.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) - ch_report_input = ch_report_input - .mix( - ROSETTAFOLD2NA - .out - .pdb - .map { meta, pdb -> [ meta, [ pdb ] ] } - .join(ROSETTAFOLD2NA.out.msa) - .join(ROSETTAFOLD2NA.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD2NA.out.pdb) + ch_multiqc = ch_multiqc.mix(ROSETTAFOLD2NA.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) + ch_report_input = ch_report_input + .mix( + ROSETTAFOLD2NA.out.pdb.map { meta, pdb -> [meta, [pdb]] } + .join(ROSETTAFOLD2NA.out.msa) + .join(ROSETTAFOLD2NA.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(ROSETTAFOLD2NA.out.pdb) } // WORKFLOW: Run Boltz @@ -498,14 +501,15 @@ workflow NFCORE_PROTEINFOLD { PREPARE_COLABFOLD_DBS_BOLTZ.out.uniref30, params.use_msa_server ) - ch_multiqc = ch_multiqc.mix(BOLTZ.out.multiqc_report) - ch_versions = ch_versions.mix(BOLTZ.out.versions) - ch_report_input = ch_report_input.mix( - BOLTZ.out.pdb - .join(BOLTZ.out.msa) - .join(BOLTZ.out.pae) - ) - ch_top_ranked_model = ch_top_ranked_model.mix(BOLTZ.out.top_ranked_pdb) + ch_multiqc = ch_multiqc.mix(BOLTZ.out.multiqc_report) + ch_versions = ch_versions.mix(BOLTZ.out.versions) + ch_report_input = ch_report_input + .mix( + BOLTZ.out.pdb + .join(BOLTZ.out.msa) + .join(BOLTZ.out.pae) + ) + ch_top_ranked_model = ch_top_ranked_model.mix(BOLTZ.out.top_ranked_pdb) } // // POST PROCESSING: generate visualisation reports From 3b28bedcc8a099d2e59d561182d7cb464c123183 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 15:14:37 +1100 Subject: [PATCH 07/43] remove blank divs in the report is the metrics aren't there --- assets/report_template.html | 4 + bin/generate_report.py | 7 +- .../local/generate_report/generate_report.py | 172 ------------------ 3 files changed, 10 insertions(+), 173 deletions(-) delete mode 100755 modules/local/generate_report/generate_report.py diff --git a/assets/report_template.html b/assets/report_template.html index ea80f9175..e94d8369e 100644 --- a/assets/report_template.html +++ b/assets/report_template.html @@ -378,6 +378,7 @@
    +
    @@ -388,6 +389,7 @@
    +
    @@ -400,6 +402,7 @@
    +
    Residue-pair alignment error - PAE @@ -408,6 +411,7 @@
    + diff --git a/bin/generate_report.py b/bin/generate_report.py index 51b737926..ddbca4fb6 100755 --- a/bin/generate_report.py +++ b/bin/generate_report.py @@ -11,6 +11,7 @@ import json import argparse import os +import re from pathlib import Path prog_name_mapping = { @@ -94,9 +95,11 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No config=PLOTLY_CONFIG, ) - # Replace placeholder divs with plot HTML + # Replace or remove optional sections if seq_cov_html: html = html.replace('
    ', seq_cov_html, 1) + else: + html = re.sub(r'.*?', '', html, flags=re.DOTALL) # Generate the pLDDT plot and convert to HTML plddt_fig = generate_plddt_plot(parsed_structures) @@ -116,6 +119,8 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No config=PLOTLY_CONFIG, ) html = html.replace('
    ', pae_html, 1) + else: + html = re.sub(r'.*?', '', html, flags=re.DOTALL) if write_htmls: with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: diff --git a/modules/local/generate_report/generate_report.py b/modules/local/generate_report/generate_report.py deleted file mode 100755 index 86dccd315..000000000 --- a/modules/local/generate_report/generate_report.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 -from plot_utils import ( - reset_residue_numbers, - sort_structures_by_rank, - align_structures, - plddt_from_struct_b_factor, - generate_plddt_plot, - generate_pae_plot, - generate_sequence_coverage_plot, -) -import json -import argparse -import os -from pathlib import Path - -prog_name_mapping = { - "proteinfold": "ProteinFold", - "alphafold2": "AlphaFold2", - "alphafold3": "AlphaFold3", - "esmfold": "ESMFold", - "colabfold": "ColabFold", - "rosettafold-all-atom": "RoseTTAFold-All-Atom", - "rosettafold2na": "RoseTTAFold2NA", - "helixfold3": "HelixFold3", - "boltz": "Boltz", - "comparison": "Comparison", -} - -def get_template_path(): - # Get directory where this script lives: modules/local/generate_report/ - script_dir = Path(__file__).parent.parent.parent # Go up to modules/local/ - template_path = script_dir / "assets" / "report_template.html" - - if not template_path.exists(): - raise FileNotFoundError( - f"Template not found: {template_path}\n" - f"Expected: {script_dir}/assets/report_template.html" - ) - - return str(template_path) - -def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=None, pae_files=None, prog="proteinfold", type="standard", html_template=None, write_htmls=True): - - PLOTLY_CONFIG = {"displayModeBar": True, "displaylogo": False, "scrollZoom": True} - - # Sort structures by name and limit to set number - if len(structures) > num_structs_limit: - print(f"Warning: More than {num_structs_limit} structures provided. Sorting and using only the first {num_structs_limit} structures.") - sorted_structures = sort_structures_by_rank(structures, prog) - structures = sorted_structures[:num_structs_limit] - - # Keep original file paths for reading structure data and NGL viewer - structure_paths = list(structures) - - # Detect structure format for NGL viewer - struct_format = "cif" if structure_paths[0].endswith(".cif") else "pdb" - - # Parse structures into BioPython objects with sequential residue numbering - # (ESMFold, HF3 etc. restart numbering per chain — renumber to be sequential) - parsed_structures = [reset_residue_numbers(s) for s in structure_paths] - - # For comparison mode, re-parse and align structures - if type == "comparison": - parsed_structures = align_structures(structure_paths) - - print("Structures:", structure_paths) - - # Read HTML template - with open(html_template, "r") as f: - html = f.read() - - # Build configuration JSON for JavaScript - config = { - "reportType": type, - "sampleName": name, - "programName": prog_name_mapping.get(prog, prog), - "structFormat": struct_format, - "models": [f"Rank {idx+1}" for idx, _ in enumerate(parsed_structures)], - "plddt_averages": [round(plddt_from_struct_b_factor(s).mean(), 2) for s in parsed_structures], - "models_data": [open(s, "r").read().replace("\n", "\\n") for s in structure_paths], - } - - # Inject configuration as a JSON script tag before - config_script = f'' - html = html.replace('', f'{config_script}\n', 1) - - # Generate sequence coverage plot from first MSA file - seq_cov_html = None - if msa_files: - seq_cov_fig = generate_sequence_coverage_plot(msa_files[0], out_dir, name) - seq_cov_html = seq_cov_fig.to_html( - full_html=False, - include_plotlyjs="cdn", - config=PLOTLY_CONFIG, - ) - - # Replace placeholder divs with plot HTML - if seq_cov_html: - html = html.replace('
    ', seq_cov_html, 1) - - # Generate the pLDDT plot and convert to HTML - plddt_fig = generate_plddt_plot(parsed_structures) - plddt_html = plddt_fig.to_html( - full_html=False, - include_plotlyjs="cdn", - config=PLOTLY_CONFIG, - ) - html = html.replace('
    ', plddt_html, 1) - - # Generate PAE plot from first PAE file (TODO: toggle PAE with model selection) - if pae_files: - pae_fig = generate_pae_plot(pae_files[0], out_dir, name) - pae_html = pae_fig.to_html( - full_html=False, - include_plotlyjs="cdn", - config=PLOTLY_CONFIG, - ) - html = html.replace('
    ', pae_html, 1) - - if write_htmls: - with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: - out_file.write(plddt_html) - if seq_cov_html: - with open(f"{out_dir}/{name}_coverage_MSA.html", "w") as out_file: - out_file.write(seq_cov_html) - - # Write the final HTML report - with open(f"{out_dir}/{name}_{type}_report.html", "w") as out_file: - out_file.write(html) - -def main(): - parser = argparse.ArgumentParser(description="Generate protein structure reports.") - parser.add_argument("--name", required=True, help="Name of the report.") - parser.add_argument("--output_dir", required=True, help="Output directory for the report.") - parser.add_argument("--structs", required=True, nargs="+", help="List of structure file paths (.pdb or .cif).") - parser.add_argument("--msa", nargs="+", default=None, help="MSA file path(s).") - parser.add_argument("--pae", nargs="+", default=None, help="PAE file path(s).") - parser.add_argument("--prog", default="proteinfold", choices=["proteinfold", "alphafold2", "alphafold3", "esmfold", "colabfold", "rosettafold-all-atom", "rosettafold2na", "helixfold3", "boltz", "comparison"], type=str.lower, help="The program used to generate the structures.") - parser.add_argument("--type", default="standard", choices=["standard", "comparison"], help="The type of report to generate.") - parser.add_argument("--html_template", default=None, help="Path to the HTML report template.") - parser.add_argument("--write_htmls", default=True, help="Write out separate files for each html plot.") - - args = parser.parse_args() - - print("Generating report.....") - - html_template = args.html_template or get_template_path() - - # Both these values could be missing - ESMFold for MSA, many others for PAE - if args.msa and os.path.basename(args.msa[0]).startswith("DUMMY_MSA"): - args.msa = None - if args.pae and os.path.basename(args.pae[0]).startswith("DUMMY_PAE"): - args.pae = None # Catch-all for any future optional metric args - for attr in vars(args): - val = getattr(args, attr) - if isinstance(val, list) and val and os.path.basename(val[0]).startswith("DUMMY_"): - setattr(args, attr, None) - generate_report( - name=args.name, - out_dir=args.output_dir, - structures=args.structs, - num_structs_limit=5, - msa_files=args.msa, - pae_files=args.pae, - prog=args.prog, - type=args.type, - html_template=html_template, - write_htmls=args.write_htmls, - ) - -if __name__ == "__main__": - main() From a22ab62cc219dbc9e8a68d3ee87522c0d685815c Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 15:26:53 +1100 Subject: [PATCH 08/43] simplify post_processing by not having a seq_coverage.png --- bin/generate_report.py | 11 +--- modules/local/generate_report/main.nf | 4 -- subworkflows/local/post_processing.nf | 75 +++++++++++---------------- 3 files changed, 31 insertions(+), 59 deletions(-) diff --git a/bin/generate_report.py b/bin/generate_report.py index ddbca4fb6..7decfe515 100755 --- a/bin/generate_report.py +++ b/bin/generate_report.py @@ -40,7 +40,7 @@ def get_template_path(): return str(template_path) -def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=None, pae_files=None, prog="proteinfold", type="standard", html_template=None, write_htmls=True): +def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=None, pae_files=None, prog="proteinfold", type="standard", html_template=None): PLOTLY_CONFIG = {"displayModeBar": True, "displaylogo": False, "scrollZoom": True} @@ -122,13 +122,6 @@ def generate_report(name, out_dir, structures, num_structs_limit=5, msa_files=No else: html = re.sub(r'.*?', '', html, flags=re.DOTALL) - if write_htmls: - with open(f"{out_dir}/{name}_coverage_pLDDT.html", "w") as out_file: - out_file.write(plddt_html) - if seq_cov_html: - with open(f"{out_dir}/{name}_coverage_MSA.html", "w") as out_file: - out_file.write(seq_cov_html) - # Write the final HTML report with open(f"{out_dir}/{name}_{type}_report.html", "w") as out_file: out_file.write(html) @@ -143,7 +136,6 @@ def main(): parser.add_argument("--prog", default="proteinfold", choices=["proteinfold", "alphafold2", "alphafold3", "esmfold", "colabfold", "rosettafold-all-atom", "rosettafold2na", "helixfold3", "boltz", "comparison"], type=str.lower, help="The program used to generate the structures.") parser.add_argument("--type", default="standard", choices=["standard", "comparison"], help="The type of report to generate.") parser.add_argument("--html_template", default=None, help="Path to the HTML report template.") - parser.add_argument("--write_htmls", default=True, help="Write out separate files for each html plot.") args = parser.parse_args() @@ -172,7 +164,6 @@ def main(): prog=args.prog, type=args.type, html_template=html_template, - write_htmls=args.write_htmls, ) if __name__ == "__main__": diff --git a/modules/local/generate_report/main.nf b/modules/local/generate_report/main.nf index 7d90ef821..f5d5a70c3 100644 --- a/modules/local/generate_report/main.nf +++ b/modules/local/generate_report/main.nf @@ -13,8 +13,6 @@ process GENERATE_REPORT { output: tuple val(meta), path ("*report.html") , emit: report - tuple val(meta), path ("*seq_coverage.png"), optional: true, emit: sequence_coverage - tuple val(meta), path ("*_pLDDT.html") , emit: plddt path "versions.yml" , emit: versions when: @@ -45,8 +43,6 @@ process GENERATE_REPORT { stub: """ touch test_alphafold2_report.html - touch test_seq_coverage.png - touch test_pLDDT.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 0db03e5b3..119de6aea 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -40,53 +40,43 @@ workflow POST_PROCESSING { ch_comparison_report_files = channel.empty() if (!skip_visualisation){ + ch_report_input + .multiMap { meta, pdbs, msa, pae -> + full: [meta, pdbs, msa, pae] + msa_only: [meta, msa] + } + .set { ch_report_split } + GENERATE_REPORT( - ch_report_input, + ch_report_split.full, ch_report_template ) ch_versions = ch_versions.mix(GENERATE_REPORT.out.versions) if (requested_modes_size > 1){ - // Multi-mode comparison: group structures and coverage data from all modes + // Multi-mode comparison: group top-ranked structures and MSA data from all modes ch_top_ranked_model - .map { meta, pdb -> - [["id": meta.id], meta, pdb] + .join(ch_report_split.msa_only) + .map { meta, pdb, msa -> + [["id": meta.id], meta, pdb, msa] } - .join( - GENERATE_REPORT.out.sequence_coverage, - by: [0], - remainder: true // Include models without coverage (e.g., ESMFold) - ) .groupTuple(by: [0], size: requested_modes_size) - .map { key, model_meta_list, coverage_list -> - key.models = model_meta_list.collect { meta, pdb -> meta.model }.join(',') - [key, model_meta_list.collect { meta, pdb -> pdb }, coverage_list] - } - .set { ch_comparison_report_input } - - // Separate channel components for clarity - ch_comparison_report_input - .map { meta, structures, coverage -> - [meta, structures.collect { f -> f.name }] + .map { key, model_meta_list, pdbs, msas -> + def models_str = model_meta_list.collect { it.model }.join(',') + [key + [models: models_str], pdbs, msas] } - .set { ch_pdb_input } - - ch_comparison_report_input - .map { meta, structures, coverage -> - [meta, coverage.findAll { f -> f != null }.collect { f -> f.name }] - } - .set { ch_msa_input } - - ch_comparison_report_input - .map { meta, structures, coverage -> - (structures + coverage.findAll { f -> f != null }).unique() + .multiMap { meta, pdbs, msas -> + def valid_msas = msas.findAll { !it.name.startsWith("DUMMY_") } + pdbs: [meta, pdbs.collect { it.name }] + msas: [meta, valid_msas.collect { it.name }] + allfiles: (pdbs + valid_msas).unique() } - .set { ch_all_files } + .set { ch_split } COMPARE_STRUCTURES( - ch_pdb_input, - ch_msa_input, - ch_all_files, + ch_split.pdbs, + ch_split.msas, + ch_split.allfiles, ch_report_template ) ch_versions = ch_versions.mix(COMPARE_STRUCTURES.out.versions) @@ -127,26 +117,21 @@ workflow POST_PROCESSING { ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_methods_description)) - ch_multiqc_files = channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + .mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + .mix(ch_collated_versions) MULTIQC ( ch_multiqc_rep .combine( ch_multiqc_files .collect() - .map { it -> [it] } + .map { [it] } ) .map { meta, report_files, multiqc_files -> [ meta, report_files + multiqc_files ] }, ch_multiqc_config, - ch_multiqc_custom_config - .collect() - .ifEmpty([]), - ch_multiqc_logo - .collect() - .ifEmpty([]), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), [], [] ) From 9c26e56e486417c66e7582f86fe9dd4a7a58c3a6 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 15:32:01 +1100 Subject: [PATCH 09/43] remove write_html since I don't think the intermediates have advantage of plotly saving from the actual report --- modules/local/generate_report/plot_utils.py | 309 -------------------- 1 file changed, 309 deletions(-) delete mode 100644 modules/local/generate_report/plot_utils.py diff --git a/modules/local/generate_report/plot_utils.py b/modules/local/generate_report/plot_utils.py deleted file mode 100644 index eaf9d02e4..000000000 --- a/modules/local/generate_report/plot_utils.py +++ /dev/null @@ -1,309 +0,0 @@ -import plotly.graph_objects as go -from Bio import PDB - -import numpy as np -import os - -def reset_residue_numbers(structure): - """ - Resets residue numbering in a PDB file, because ESMFold starts renumbering - at 1 for each chain and increments only when encountering a new residue. - """ - if str(structure).endswith(".pdb"): - parser = PDB.PDBParser(QUIET=True) - elif str(structure).endswith(".cif"): - parser = PDB.MMCIFParser(QUIET=True) - else: - raise ValueError(f"{structure} is neither a PDB or mmCIF file!") - - struct_obj = parser.get_structure("structure", structure) - - for model in struct_obj: - for chain in model: - for idx, residue in enumerate(chain.get_residues(), start=1): - # Do a swap in place to renumber the residue, the other entries in the tuple can stay the same - # See: https://biopython.org/docs/1.76/api/Bio.PDB.Chain.html#Bio.PDB.Chain.Chain.__getitem__ - het_atom, _, insertion_code = residue.get_id() - residue.id = (het_atom, idx, insertion_code) - - return struct_obj - -# TODO: Barcelona team to implement AF3 -def sort_structures_by_rank(structures, prog): - """ - Sorts a list of structures based on their rank. Handles different program naming conventions. - - Returns: - List of structure files sorted by rank (always returns list, even for single structures) - """ - if prog == "alphafold2": - # AlphaFold2 structures are named with [run]/ranked_[rank].pdb - sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).replace('ranked_', '').split('.')[0])) - elif prog == "colabfold": - # ColabFold structures are named with [run]_unrelaxed_rank_[rank]_alphafold2_ptm_model_[num]_seed_[seed].pdb - sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_')[3])) - elif prog == "helixfold3": - # HelixFold3 structures are named with .../[run]/[run]-rank[rank]/predicted_structure.pdb - sorted_structures = sorted(structures, key=lambda x: int(os.path.dirname(x).split('rank')[-1])) - elif prog == "boltz": - # Boltz structures are named with ..._model_[diffusion_samples-1].[pdb|cif] - sorted_structures = sorted(structures, key=lambda x: int(os.path.basename(x).split('_model_')[-1].split('.')[0])) - elif prog == "esmfold" or prog == "rosettafold-all-atom": - # ESMFold and RoseTTAFold only produce one structure - sorted_structures = structures if isinstance(structures, list) else [structures] - else: - print(f"Warning: Sorting not implemented for {prog}. Using original order.") - sorted_structures = structures if isinstance(structures, list) else [structures] - - return sorted_structures if isinstance(sorted_structures, list) else [sorted_structures] - -def align_structures(structures): - """ - Align multiple structures against the first (reference) structure. - Uses common atoms for superimposition (handles cases where structures aren't complete). - - Returns: - List of BioPython structure objects aligned to the first structure - """ - if not structures: - raise ValueError("No structures provided for alignment.") - - if structures[0].endswith(".pdb"): - parser = PDB.PDBParser(QUIET=True) - elif structures[0].endswith(".cif"): - parser = PDB.MMCIFParser(QUIET=True) - else: - raise ValueError(f"{structures[0]} is neither a PDB or mmCIF file!") - - parsed_structures = [parser.get_structure(f"structure-{idx}", structure) for idx, structure in enumerate(structures)] - ref_structure = parsed_structures[0] - - def get_atom_ids(structure): - # Note: this is a *set* of atom_ids due to the {} surrounding the comprehension - return {(atom.get_parent().get_id(), atom.name) for atom in structure.get_atoms()} - - # Find common atoms across all structures (progressive intersection) - # This allows alignment even if structures are incomplete or have different atom coverage - common_atoms = get_atom_ids(ref_structure) - for structure in parsed_structures[1:]: - common_atoms.intersection_update(get_atom_ids(structure)) - - if not common_atoms: - raise ValueError("No common atoms found between structures for alignment.") - - def extract_atoms(structure, atom_ids): - # Must return a sorted list (not set) so ref/target atoms correspond positionally - atoms = [atom for atom in structure.get_atoms() if (atom.get_parent().get_id(), atom.name) in atom_ids] - return sorted(atoms, key=lambda a: (a.get_parent().get_id(), a.name)) - - ref_atoms = extract_atoms(ref_structure, common_atoms) - - # The aligned structures will be the parsed structures aligned to the common atoms of the reference structure - super_imposer = PDB.Superimposer() - aligned_structures = [ref_structure] # Reference needs no alignment - for idx, structure in enumerate(parsed_structures[1:], start=1): - target_atoms = extract_atoms(structure, common_atoms) - super_imposer.set_atoms(list(ref_atoms), list(target_atoms)) - super_imposer.apply(structure.get_atoms()) - aligned_structures.append(structure) - - return aligned_structures - -def plddt_from_struct_b_factor(structure): - """ - Extracts residue pLDDT values from the b-factor column using BioPython. - Accepts either a file path (str/Path) or a pre-parsed BioPython Structure object. - """ - if isinstance(structure, (str, os.PathLike)): - if str(structure).endswith(".pdb"): - parser = PDB.PDBParser(QUIET=True) - elif str(structure).endswith(".cif"): - parser = PDB.MMCIFParser(QUIET=True) - else: - raise ValueError(f"{structure} is neither a PDB or mmCIF file!") - struct_obj = parser.get_structure(os.path.basename(str(structure)), str(structure)) - else: - # Already a BioPython structure object - struct_obj = structure - - res_plddts = [] - - for model in struct_obj: - for chain in model: - for residue in chain: - atom_list = residue.get_unpacked_list() - atom_plddt_tot = 0 - # Handle both atom-wise and residue-wise pLDDT values - for atom in residue: - atom_plddt = atom.get_bfactor() - atom_plddt_tot += atom_plddt - - res_plddt = float(atom_plddt_tot / len(atom_list)) if atom_list else 0.0 - - # Ensure values are in [0, 100] range - if res_plddt < 1: - res_plddt *= 100 - - res_plddts.append(res_plddt) - - res_plddts = np.array(res_plddts) - res_plddts = np.round(res_plddts, 2) - - return res_plddts - -def generate_plddt_plot(structures): - """ - Generate a Plotly figure for pLDDT per position for given structures. - - Args: - structures (list): List of structure file paths or BioPython structure objects. - - Returns: - go.Figure: Plotly figure object with pLDDT data. - """ - plddt_per_struct = {} - - for idx, struct in enumerate(structures): - plddt_per_struct[f"rank-{idx}"] = plddt_from_struct_b_factor(struct) - - fig = go.Figure() - - for idx, (name, plddts) in enumerate(plddt_per_struct.items()): - fig.add_trace( - go.Scatter( - x=list(range(len(plddts))), - y=plddts, - mode="lines", - name=name, - text=[f"({pos}, {value:.2f})" for pos, value in enumerate(plddts)], - hoverinfo="text", - ) - ) - fig.update_layout( - title=dict(text="pLDDT per position", x=0.5, xanchor="center"), - xaxis=dict( - title="Positions", showline=True, linecolor="black", gridcolor="WhiteSmoke" - ), - yaxis=dict( - title="pLDDT", - range=[0, 100], - showline=True, - linecolor="black", - gridcolor="WhiteSmoke", - ), - legend=dict( - yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1 - ), - plot_bgcolor="white", - width=600, - height=600, - ) - - return fig - -def process_msas(msa_path): - msa = np.loadtxt(msa_path, dtype=int) - - query_sequence = msa[0] - seqid_match = np.mean(msa == query_sequence, axis=1) - - # Sort sequences by sequence identity - seqid_sort_indices = np.argsort(seqid_match) - sorted_msa = msa[seqid_sort_indices] - sorted_seqid = seqid_match[seqid_sort_indices] - - non_gaps_msas = np.where(sorted_msa != 21, 1.0, np.nan) - - # Scale non-gap positions by sequence identity - final_msas = non_gaps_msas * sorted_seqid[:, None] - - return final_msas, non_gaps_msas - -def generate_sequence_coverage_plot(msa_path, out_dir, name, save_image=False): - """ - Generate an interactive Plotly heatmap for sequence coverage with depth overlay. - """ - final_msas, non_gaps_msas = process_msas(msa_path) - seq_depth_counts = np.sum(~np.isnan(non_gaps_msas), axis=0) - - # Create interactive Plotly figure - fig = go.Figure() - - # Add heatmap for sequence coverage - fig.add_trace( - go.Heatmap( - z=final_msas, - colorscale="Rainbow_r", - zmin=0, - zmax=1, - colorbar={"title": "Sequence
    identity"}, - name="", - ) - ) - - # Add black line for sequence coverage depth as secondary trace - fig.add_trace( - go.Scatter( - x=list(range(len(seq_depth_counts))), - y=seq_depth_counts, - mode="lines", - line=dict(color="black", width=2), - name="Coverage Depth", - yaxis="y2", - ) - ) - - # Update layout with dual y-axes - fig.update_layout( - title=dict(text="Sequence coverage", x=0.5, xanchor="center"), - xaxis_title="Positions", - yaxis_title="Sequences", - yaxis2=dict( - title="Coverage Depth", - overlaying="y", - side="right", - ), - width=800, - height=600, - ) - - if save_image: - image_path = f"{out_dir}/{name+('_' if name else '')}seq_coverage.png" - fig.write_image(image_path, width=800, height=600) - return fig, image_path - else: - return fig - -def generate_pae_plot(pae_path, out_dir, name, save_image=False): - """ - Generate an interactive Plotly heatmap for Predicted Aligned Error (PAE) data. - """ - pae = np.genfromtxt(pae_path, delimiter="\t") - max_pae = np.max(pae) - fig = go.Figure() - - # Add heatmap with green colorscale - fig.add_trace( - go.Heatmap( - z=pae, - colorscale="Greens_r", - zmin=0, - zmax=max_pae, - colorbar={"title": "PAE (Å)"}, - ) - ) - - fig.update_layout( - title=dict(text="Predicted Aligned Error", x=0.5, xanchor="center"), - xaxis=dict(title="Scored Residue"), - yaxis=dict(title="Aligned Residue"), - width=800, - height=800, - ) - - if save_image: - image_path = f"{out_dir}/{name+('_' if name else '')}pae.png" - fig.write_image(image_path, width=800, height=800) - return fig, image_path - else: - return fig From 461f3bb2da8de0762f2fe22c8f36e949767baa1b Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 16:00:00 +1100 Subject: [PATCH 10/43] remove unused multiqc channel which used to just plot plddt, and blocks the way for PR #450 --- main.nf | 12 +--------- modules/local/colabfold_batch/main.nf | 2 +- modules/local/run_alphafold2/main.nf | 2 +- modules/local/run_alphafold2_pred/main.nf | 2 +- modules/local/run_alphafold3/main.nf | 2 +- modules/local/run_boltz/main.nf | 2 +- modules/local/run_esmfold/main.nf | 2 +- modules/local/run_helixfold3/main.nf | 2 +- modules/local/run_rosettafold2na/main.nf | 2 +- .../local/run_rosettafold_all_atom/main.nf | 2 +- subworkflows/local/post_processing.nf | 9 +------- workflows/alphafold2.nf | 22 ------------------- workflows/alphafold3.nf | 13 ----------- workflows/boltz.nf | 9 -------- workflows/colabfold.nf | 12 ---------- workflows/esmfold.nf | 11 ---------- workflows/helixfold3.nf | 12 ---------- workflows/rosettafold2na.nf | 12 ---------- workflows/rosettafold_all_atom.nf | 12 ---------- 19 files changed, 11 insertions(+), 131 deletions(-) diff --git a/main.nf b/main.nf index 361bf34e9..0a78985dd 100644 --- a/main.nf +++ b/main.nf @@ -67,7 +67,6 @@ workflow NFCORE_PROTEINFOLD { main: ch_samplesheet = samplesheet - ch_multiqc = channel.empty() ch_versions = channel.empty() ch_report_input = channel.empty() ch_top_ranked_model = channel.empty() @@ -136,7 +135,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, PREPARE_ALPHAFOLD2_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) ch_report_input = ch_report_input .mix( @@ -195,7 +193,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD3_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD3.out.multiqc_report) ch_versions = ch_versions.mix(ALPHAFOLD3.out.versions) ch_report_input = ch_report_input .mix( @@ -240,7 +237,6 @@ workflow NFCORE_PROTEINFOLD { params.colabfold_num_recycles ) - ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) ch_versions = ch_versions.mix(COLABFOLD.out.versions) ch_report_input = ch_report_input .mix( @@ -279,7 +275,6 @@ workflow NFCORE_PROTEINFOLD { params.esmfold_num_recycles ) - ch_multiqc = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ESMFOLD.out.versions) ch_report_input = ch_report_input .mix( @@ -324,7 +319,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.rfaa_paper_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD_ALL_ATOM.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) ch_report_input = ch_report_input .mix( @@ -396,7 +390,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) - ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) ch_report_input = ch_report_input .mix( @@ -446,7 +439,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD2NA_DBS.out.rna, PREPARE_ROSETTAFOLD2NA_DBS.out.rosettafold2na_weights ) - ch_multiqc = ch_multiqc.mix(ROSETTAFOLD2NA.out.multiqc_report.collect()) ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) ch_report_input = ch_report_input .mix( @@ -501,7 +493,6 @@ workflow NFCORE_PROTEINFOLD { PREPARE_COLABFOLD_DBS_BOLTZ.out.uniref30, params.use_msa_server ) - ch_multiqc = ch_multiqc.mix(BOLTZ.out.multiqc_report) ch_versions = ch_versions.mix(BOLTZ.out.versions) ch_report_input = ch_report_input .mix( @@ -531,7 +522,6 @@ workflow NFCORE_PROTEINFOLD { params.skip_multiqc, params.outdir, ch_versions, - ch_multiqc, ch_multiqc_config, ch_multiqc_custom_config, ch_multiqc_logo, @@ -540,7 +530,7 @@ workflow NFCORE_PROTEINFOLD { ) emit: - multiqc_report = ch_multiqc + multiqc_report = POST_PROCESSING.out.multiqc_report } /* diff --git a/modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch/main.nf index ee567e2d8..bef7f7168 100644 --- a/modules/local/colabfold_batch/main.nf +++ b/modules/local/colabfold_batch/main.nf @@ -18,7 +18,7 @@ process COLABFOLD_BATCH { tuple val(meta), path ("${meta.id}_colabfold.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/*relaxed_rank_*.pdb") , emit: pdb tuple val(meta), path ("${meta.id}_colabfold_msa.tsv") , emit: msa - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index eea900702..73f8f4f8c 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -29,7 +29,7 @@ process RUN_ALPHAFOLD2 { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa // Note: alphafold2_model_preset == "monomer" the pae file won't exist, thus the optional tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index 30a581a32..03f7c6f1b 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -28,7 +28,7 @@ process RUN_ALPHAFOLD2_PRED { tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt //Note: alphafold2_model_preset == "monomer" the pae file won't exist. tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae diff --git a/modules/local/run_alphafold3/main.nf b/modules/local/run_alphafold3/main.nf index 48e38815e..1d5636957 100644 --- a/modules/local/run_alphafold3/main.nf +++ b/modules/local/run_alphafold3/main.nf @@ -21,7 +21,7 @@ process RUN_ALPHAFOLD3 { path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold3.cif") , emit: top_ranked_cif tuple val(meta), path ("raw/*ranked_*.cif") , emit: cif - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_alphafold3_msa.tsv") , emit: msa tuple val(meta), path ("${meta.id}_0_pae.tsv") , emit: pae tuple val(meta), path ("${meta.id}_ptm.tsv") , emit: ptms diff --git a/modules/local/run_boltz/main.nf b/modules/local/run_boltz/main.nf index 0960aa4e9..23d23b3f6 100644 --- a/modules/local/run_boltz/main.nf +++ b/modules/local/run_boltz/main.nf @@ -21,7 +21,7 @@ process RUN_BOLTZ { tuple val(meta), path ("boltz_results_*/processed/msa/*.npz") , emit: msa tuple val(meta), path ("boltz_results_*/processed/structures/*.npz") , emit: structures tuple val(meta), path ("boltz_results_*/predictions/*/confidence*.json") , emit: confidence - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_boltz.pdb") , emit: top_ranked_pdb tuple val(meta), path ("boltz_results_*/predictions/*/*.pdb") , emit: pdb tuple val(meta), path ("boltz_results_*/predictions/*/plddt_*model_0.npz") , emit: plddt diff --git a/modules/local/run_esmfold/main.nf b/modules/local/run_esmfold/main.nf index dc4394c75..767d65082 100644 --- a/modules/local/run_esmfold/main.nf +++ b/modules/local/run_esmfold/main.nf @@ -13,7 +13,7 @@ process RUN_ESMFOLD { output: tuple val(meta), path ("${meta.id}_esmfold.pdb") , emit: top_ranked_pdb tuple val(meta), path ("*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt path "versions.yml" , emit: versions when: diff --git a/modules/local/run_helixfold3/main.nf b/modules/local/run_helixfold3/main.nf index b22417775..5ee5962d2 100644 --- a/modules/local/run_helixfold3/main.nf +++ b/modules/local/run_helixfold3/main.nf @@ -30,7 +30,7 @@ process RUN_HELIXFOLD3 { tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_helixfold3_msa.tsv") , emit: msa // If ${meta.id}-rank*/all_results.json" doesn't have PAE vales in the key, this will be empty tuple val(meta), path ("${meta.id}_1_pae.tsv") , emit: pae diff --git a/modules/local/run_rosettafold2na/main.nf b/modules/local/run_rosettafold2na/main.nf index 2650610e7..aeeec467a 100644 --- a/modules/local/run_rosettafold2na/main.nf +++ b/modules/local/run_rosettafold2na/main.nf @@ -20,7 +20,7 @@ process RUN_ROSETTAFOLD2NA { path ("raw/**") , emit: raw tuple val(meta), path("${meta.id}_rosettafold2na.pdb") , emit: top_ranked_pdb tuple val(meta), path("raw/*.pdb") , emit: pdb - tuple val(meta), path("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path("${meta.id}_rosettafold2na_msa.tsv") , emit: msa tuple val(meta), path("${meta.id}_0_pae.tsv") , emit: pae path "versions.yml" , emit: versions diff --git a/modules/local/run_rosettafold_all_atom/main.nf b/modules/local/run_rosettafold_all_atom/main.nf index 2c1147fa0..00154c388 100644 --- a/modules/local/run_rosettafold_all_atom/main.nf +++ b/modules/local/run_rosettafold_all_atom/main.nf @@ -20,7 +20,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { output: path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_rosettafold_all_atom.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: plddt tuple val(meta), path ("${meta.id}_rosettafold_all_atom_msa.tsv") , emit: msa // I think there should always be PAE from the .pt PyTorch model. extract_metrics.py has condition import torch to handle this tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: paes diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 119de6aea..24ef5aa0b 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -29,7 +29,6 @@ workflow POST_PROCESSING { skip_multiqc outdir ch_versions - ch_multiqc_rep ch_multiqc_config ch_multiqc_custom_config ch_multiqc_logo @@ -122,13 +121,7 @@ workflow POST_PROCESSING { .mix(ch_collated_versions) MULTIQC ( - ch_multiqc_rep - .combine( - ch_multiqc_files - .collect() - .map { [it] } - ) - .map { meta, report_files, multiqc_files -> [ meta, report_files + multiqc_files ] }, + ch_multiqc_files.collect().map { [[id: "proteinfold", model: "proteinfold"], it] }, ch_multiqc_config, ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 1c17cea1c..7b5939e1b 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -49,7 +49,6 @@ workflow ALPHAFOLD2 { ch_top_ranked_pdb = channel.empty() ch_msa = channel.empty() ch_pae = channel.empty() - ch_multiqc_report = channel.empty() if (alphafold2_model_preset != 'multimer') { ch_samplesheet @@ -83,16 +82,6 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold2" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2.out.top_ranked_pdb) ch_msa = ch_msa.mix(RUN_ALPHAFOLD2.out.msa) @@ -143,16 +132,6 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2_PRED - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold2" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2_PRED.out.top_ranked_pdb) ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) ch_msa = ch_msa.mix(RUN_ALPHAFOLD2_PRED.out.msa) @@ -197,7 +176,6 @@ workflow ALPHAFOLD2 { pdb = ch_pdb_final // channel: [ meta, /path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] // Would prefer channel: [ meta, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ meta, /path/to/*_0_pae.tsv] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/alphafold3.nf b/workflows/alphafold3.nf index 84ff42186..a42a0de19 100644 --- a/workflows/alphafold3.nf +++ b/workflows/alphafold3.nf @@ -41,7 +41,6 @@ workflow ALPHAFOLD3 { ch_pdb_final = channel.empty() ch_top_ranked_pdb = channel.empty() ch_msa_final = channel.empty() - ch_multiqc_report = channel.empty() FASTA_TO_ALPHAFOLD3_JSON(ch_samplesheet) ch_versions = ch_versions.mix(FASTA_TO_ALPHAFOLD3_JSON.out.versions) @@ -124,17 +123,6 @@ workflow ALPHAFOLD3 { } .set { ch_msa_final } - // Prepare report input - RUN_ALPHAFOLD3 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "alphafold3" ], it.flatten() ] - } - .set { ch_multiqc_report } - // Prepare dummy pae input RUN_ALPHAFOLD3 .out @@ -151,7 +139,6 @@ workflow ALPHAFOLD3 { pdb = ch_pdb_final // channel: [ meta, /path/to/*.pdb, ...,/path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] pae = ch_pae_final // channel: [ meta, path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/boltz.nf b/workflows/boltz.nf index de82b05e7..5fafb3664 100644 --- a/workflows/boltz.nf +++ b/workflows/boltz.nf @@ -175,14 +175,6 @@ workflow BOLTZ { } .set { ch_pae } - RUN_BOLTZ - .out - .multiqc - .map { it -> it[1] } - .collect(sort: true) - .map { it -> [ [ "model": "boltz"], it.flatten() ] } - .set { ch_multiqc_report } - ch_versions = ch_versions.mix(RUN_BOLTZ.out.versions) emit: @@ -190,7 +182,6 @@ workflow BOLTZ { msa = ch_msa structures = RUN_BOLTZ.out.structures confidence = RUN_BOLTZ.out.confidence - multiqc_report = ch_multiqc_report top_ranked_pdb = ch_top_ranked_pdb pdb = ch_pdb pae = ch_pae diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 312a22b4a..9d4dd7884 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -37,7 +37,6 @@ workflow COLABFOLD { num_recycles // int: Number of recycles for colabfold main: - ch_multiqc_report = channel.empty() if (params.use_msa_server) { // @@ -113,22 +112,11 @@ workflow COLABFOLD { modeChannel(COLABFOLD_BATCH.out.msa, "colabfold").set { ch_msa_final } modeChannel(COLABFOLD_BATCH.out.pae, "colabfold").set { ch_pae_final } - COLABFOLD_BATCH - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model":"colabfold"], it.flatten() ] - } - .set { ch_multiqc_report } - emit: top_ranked_pdb = ch_top_ranked_pdb // channel: [ meta, /path/to/*.pdb ] pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 5a221d986..c6af5f1ba 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -56,21 +56,10 @@ workflow ESMFOLD { ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) } - RUN_ESMFOLD - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "esmfold"], it.flatten() ] - } - .set { ch_multiqc_report } - modeChannel(RUN_ESMFOLD.out.pdb, "esmfold").set { ch_pdb_final } emit: pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index 9defb5c43..cb6de9a86 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -47,7 +47,6 @@ workflow HELIXFOLD3 { main: ch_pdb = channel.empty() ch_top_ranked_pdb = channel.empty() - ch_multiqc_report = channel.empty() // // SUBWORKFLOW: Run helixfold3 @@ -77,16 +76,6 @@ workflow HELIXFOLD3 { ch_helixfold3_maxit_src ) - RUN_HELIXFOLD3 - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "helixfold3" ], it.flatten() ] - } - .set { ch_multiqc_report } - ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3.out.pdb) ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) @@ -119,7 +108,6 @@ workflow HELIXFOLD3 { pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rosettafold2na.nf b/workflows/rosettafold2na.nf index dfcf92861..3997f3a02 100644 --- a/workflows/rosettafold2na.nf +++ b/workflows/rosettafold2na.nf @@ -28,7 +28,6 @@ workflow ROSETTAFOLD2NA { ch_rosettafold2na_weights // channel: path(rosettafold2na_weights) main: - ch_multiqc_report = channel.empty() ROSETTAFOLD2NA_FASTA( ch_samplesheet @@ -45,16 +44,6 @@ workflow ROSETTAFOLD2NA { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD2NA.out.versions) - RUN_ROSETTAFOLD2NA - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "rosettafold2na" ], it.flatten() ] - } - .set { ch_multiqc_report } - RUN_ROSETTAFOLD2NA .out .pdb @@ -89,7 +78,6 @@ workflow ROSETTAFOLD2NA { pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index bd576087b..0d31fa49c 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -36,7 +36,6 @@ workflow ROSETTAFOLD_ALL_ATOM { ch_rfaa_paper_weights // channel: path(rfaa_paper_weightsch_dummy_file // channel: path(NO_file) main: - ch_multiqc_report = channel.empty() ch_samplesheet.branch { it -> fasta: it[1].extension == "fasta" || it[1].extension == "fa" @@ -64,16 +63,6 @@ workflow ROSETTAFOLD_ALL_ATOM { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) - RUN_ROSETTAFOLD_ALL_ATOM - .out - .multiqc - .map { it -> it[1] } - .toSortedList() - .map { it -> - [ [ "model": "rosettafold_all_atom" ], it.flatten() ] - } - .set { ch_multiqc_report } - modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pdb, "rosettafold_all_atom").set { ch_pdb_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.msa, "rosettafold_all_atom").set { ch_msa_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pae, "rosettafold_all_atom").set { ch_pae_final } @@ -82,7 +71,6 @@ workflow ROSETTAFOLD_ALL_ATOM { pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] msa = ch_msa_final // channel: [ id, /path/to/*_msa.tsv ] pae = ch_pae_final // channel: [ id, /path/to/*_pae.tsv ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 8c2edcd54be727f959f039f3d860c65ad7967a84 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 16:04:16 +1100 Subject: [PATCH 11/43] Use modeChannel() everywhere to stop exploding verbosity just to map meta.mode to each output --- .../utils_nfcore_proteinfold_pipeline/main.nf | 5 ++- workflows/alphafold2.nf | 37 +++------------- workflows/alphafold3.nf | 43 +++--------------- workflows/boltz.nf | 44 +++---------------- workflows/colabfold.nf | 23 +--------- workflows/helixfold3.nf | 23 +--------- workflows/rosettafold2na.nf | 34 +++----------- 7 files changed, 29 insertions(+), 180 deletions(-) diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf index acac78d49..16a481bbb 100644 --- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -220,11 +220,12 @@ def getColabfoldAlphafold2ParamsPath() { return path } -def modeChannel(ch, mode) { +def modeChannel(ch, mode, asList = false) { return ch.map { meta, value -> def meta_clone = meta.clone() meta_clone.model = mode - [ meta_clone, value ] + def v = asList ? ((value instanceof List) ? value : [value]) : value + [ meta_clone, v ] } } diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 7b5939e1b..584e69e91 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -11,6 +11,8 @@ include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -139,37 +141,10 @@ workflow ALPHAFOLD2 { ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } - ch_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - - ch_msa - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } - .set { ch_msa_final } - - ch_pae - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } - .set { ch_pae_final } - - ch_top_ranked_pdb_final = ch_top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold2"; - [ meta, it[1] ] - } + modeChannel(ch_pdb, "alphafold2", true).set { ch_pdb_final } + modeChannel(ch_msa, "alphafold2").set { ch_msa_final } + modeChannel(ch_pae, "alphafold2").set { ch_pae_final } + ch_top_ranked_pdb_final = modeChannel(ch_top_ranked_pdb, "alphafold2") emit: top_ranked_pdb = ch_top_ranked_pdb_final // channel: [ meta, /path/to/*.pdb ] diff --git a/workflows/alphafold3.nf b/workflows/alphafold3.nf index a42a0de19..cd5deaf0e 100644 --- a/workflows/alphafold3.nf +++ b/workflows/alphafold3.nf @@ -12,6 +12,8 @@ include { RUN_ALPHAFOLD3 } from '../modules/local/run_alphafo include { MMCIF2PDB as MMCIF2PDB_TOP_RANKED } from '../modules/local/mmcif2pdb/main.nf' include { MMCIF2PDB as MMCIF2PDB_MODELS } from '../modules/local/mmcif2pdb/main.nf' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -83,16 +85,7 @@ workflow ALPHAFOLD3 { ) ch_versions = ch_versions.mix(MMCIF2PDB_MODELS.out.versions) - MMCIF2PDB_MODELS - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } + modeChannel(MMCIF2PDB_MODELS.out.pdb, "alphafold3", true).set { ch_pdb_final } // Convert top ranked mmcif to pdb MMCIF2PDB_TOP_RANKED ( @@ -102,37 +95,13 @@ workflow ALPHAFOLD3 { ) ch_versions = ch_versions.mix(MMCIF2PDB_TOP_RANKED.out.versions) - MMCIF2PDB_TOP_RANKED - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } + modeChannel(MMCIF2PDB_TOP_RANKED.out.pdb, "alphafold3").set { ch_top_ranked_pdb } // Prepare msa input - RUN_ALPHAFOLD3 - .out - .msa - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_msa_final } + modeChannel(RUN_ALPHAFOLD3.out.msa, "alphafold3").set { ch_msa_final } // Prepare dummy pae input - RUN_ALPHAFOLD3 - .out - .pae - .map { it -> - def meta = it[0].clone(); - meta.model = "alphafold3"; - [ meta, it[1] ] - } - .set { ch_pae_final } + modeChannel(RUN_ALPHAFOLD3.out.pae, "alphafold3").set { ch_pae_final } emit: top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] diff --git a/workflows/boltz.nf b/workflows/boltz.nf index 5fafb3664..f9f1a2e4e 100644 --- a/workflows/boltz.nf +++ b/workflows/boltz.nf @@ -29,6 +29,7 @@ include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' // // MODULE: Boltz @@ -135,45 +136,10 @@ workflow BOLTZ { ch_mols ) - RUN_BOLTZ - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set {ch_pdb} - - RUN_BOLTZ - .out - .top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } - - RUN_BOLTZ - .out - .msa_raw - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_msa } - - RUN_BOLTZ - .out - .pae_raw - .map { it -> - def meta = it[0].clone(); - meta.model = "boltz" - [ meta, it[1] ] - } - .set { ch_pae } + modeChannel(RUN_BOLTZ.out.pdb, "boltz").set { ch_pdb } + modeChannel(RUN_BOLTZ.out.top_ranked_pdb, "boltz").set { ch_top_ranked_pdb } + modeChannel(RUN_BOLTZ.out.msa_raw, "boltz").set { ch_msa } + modeChannel(RUN_BOLTZ.out.pae_raw, "boltz").set { ch_pae } ch_versions = ch_versions.mix(RUN_BOLTZ.out.versions) diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 9d4dd7884..33be42113 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -88,27 +88,8 @@ workflow COLABFOLD { ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } - COLABFOLD_BATCH - .out - .top_ranked_pdb - .map { it -> - def meta_clone = it[0].clone(); - meta_clone.model = "colabfold"; - [ meta_clone, it[1] ] - } - .set { ch_top_ranked_pdb } - - COLABFOLD_BATCH - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "colabfold"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - + modeChannel(COLABFOLD_BATCH.out.top_ranked_pdb, "colabfold").set { ch_top_ranked_pdb } + modeChannel(COLABFOLD_BATCH.out.pdb, "colabfold", true).set { ch_pdb_final } modeChannel(COLABFOLD_BATCH.out.msa, "colabfold").set { ch_msa_final } modeChannel(COLABFOLD_BATCH.out.pae, "colabfold").set { ch_pae_final } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index cb6de9a86..d506c26dd 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -79,27 +79,8 @@ workflow HELIXFOLD3 { ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3.out.pdb) ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) - RUN_HELIXFOLD3 - .out - .top_ranked_pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "helixfold3"; - [ meta, it[1] ] - } - .set { ch_top_ranked_pdb } - - RUN_HELIXFOLD3 - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "helixfold3"; - def files = (it[1] instanceof List) ? it[1] : [ it[1] ] - [ meta, files ] - } - .set { ch_pdb_final } - + modeChannel(RUN_HELIXFOLD3.out.top_ranked_pdb, "helixfold3").set { ch_top_ranked_pdb } + modeChannel(RUN_HELIXFOLD3.out.pdb, "helixfold3", true).set { ch_pdb_final } modeChannel(RUN_HELIXFOLD3.out.msa, "helixfold3").set { ch_msa_final } modeChannel(RUN_HELIXFOLD3.out.pae, "helixfold3").set { ch_pae_final } diff --git a/workflows/rosettafold2na.nf b/workflows/rosettafold2na.nf index 3997f3a02..5d20cc99a 100644 --- a/workflows/rosettafold2na.nf +++ b/workflows/rosettafold2na.nf @@ -10,6 +10,8 @@ include { ROSETTAFOLD2NA_FASTA } from '../modules/local/rosettafold2na_fasta' include { RUN_ROSETTAFOLD2NA } from '../modules/local/run_rosettafold2na' +include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -44,35 +46,9 @@ workflow ROSETTAFOLD2NA { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD2NA.out.versions) - RUN_ROSETTAFOLD2NA - .out - .pdb - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_pdb_final } - - RUN_ROSETTAFOLD2NA - .out - .pae - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_pae_final } - - RUN_ROSETTAFOLD2NA - .out - .msa - .map { it -> - def meta = it[0].clone(); - meta.model = "rosettafold2na"; - [ meta, it[1] ] - } - .set { ch_msa_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.pdb, "rosettafold2na").set { ch_pdb_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.pae, "rosettafold2na").set { ch_pae_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.msa, "rosettafold2na").set { ch_msa_final } emit: pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] From 2cabebc4913c96dc34b1d7f9e07d5d65854bf906 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 16:11:35 +1100 Subject: [PATCH 12/43] radically reduce post_processing put channels by just accessing params directly --- main.nf | 16 +----------- subworkflows/local/post_processing.nf | 35 ++++++++++++--------------- 2 files changed, 16 insertions(+), 35 deletions(-) diff --git a/main.nf b/main.nf index 0a78985dd..ebba345bf 100644 --- a/main.nf +++ b/main.nf @@ -505,27 +505,13 @@ workflow NFCORE_PROTEINFOLD { // // POST PROCESSING: generate visualisation reports // - ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() - ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.empty() - ch_multiqc_logo = params.multiqc_logo ? channel.fromPath( params.multiqc_logo ).first() : channel.empty() - ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) + ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) POST_PROCESSING( - params.skip_visualisation, requested_modes_size, ch_report_input, ch_report_template, - params.skip_foldseek, - params.foldseek_db, - params.foldseek_db_path, - params.skip_multiqc, - params.outdir, ch_versions, - ch_multiqc_config, - ch_multiqc_custom_config, - ch_multiqc_logo, - ch_multiqc_methods_description, ch_top_ranked_model ) diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 24ef5aa0b..3c819e295 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -19,26 +19,16 @@ include { MULTIQC } from '../../modules/nf-core/multiqc/main' workflow POST_PROCESSING { take: - skip_visualisation requested_modes_size ch_report_input ch_report_template - skip_foldseek - foldseek_db - foldseek_db_path - skip_multiqc - outdir ch_versions - ch_multiqc_config - ch_multiqc_custom_config - ch_multiqc_logo - ch_multiqc_methods_description ch_top_ranked_model main: ch_comparison_report_files = channel.empty() - if (!skip_visualisation){ + if (!params.skip_visualisation){ ch_report_input .multiMap { meta, pdbs, msa, pae -> full: [meta, pdbs, msa, pae] @@ -82,12 +72,12 @@ workflow POST_PROCESSING { } } - if (!skip_foldseek) { + if (!params.skip_foldseek) { ch_foldseek_db = channel.value([ [ - id: foldseek_db, + id: params.foldseek_db, ], - file(foldseek_db_path, checkIfExists: true) + file(params.foldseek_db_path, checkIfExists: true) ]) FOLDSEEK_EASYSEARCH( ch_top_ranked_model, @@ -100,7 +90,7 @@ workflow POST_PROCESSING { // softwareVersionsToYAML(ch_versions) .collectFile( - storeDir: "${outdir}/pipeline_info", + storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'proteinfold_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true @@ -111,17 +101,22 @@ workflow POST_PROCESSING { // ch_multiqc_report = channel.empty() - if (!skip_multiqc) { - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) - ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_methods_description)) + if (!params.skip_multiqc) { + ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath(params.multiqc_config).first() : channel.empty() + ch_multiqc_logo = params.multiqc_logo ? channel.fromPath(params.multiqc_logo).first() : channel.empty() + ch_multiqc_methods_desc = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_methods_desc)) ch_multiqc_files = ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') .mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) .mix(ch_collated_versions) MULTIQC ( - ch_multiqc_files.collect().map { [[id: "proteinfold", model: "proteinfold"], it] }, + ch_multiqc_files.collect().map { [[id: 'proteinfold', model: 'proteinfold'], it] }, ch_multiqc_config, ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), From 42722b12cd11960eb80849c0925bf0ffbc88fd6b Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 16:16:53 +1100 Subject: [PATCH 13/43] Use modeChannel to create list even for single files, god I love modeChannel to simpligy everything --- main.nf | 6 +++--- workflows/esmfold.nf | 2 +- workflows/rosettafold2na.nf | 2 +- workflows/rosettafold_all_atom.nf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index ebba345bf..3c046cc8b 100644 --- a/main.nf +++ b/main.nf @@ -278,7 +278,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(ESMFOLD.out.versions) ch_report_input = ch_report_input .mix( - ESMFOLD.out.pdb.map { meta, pdb -> [meta, [pdb]] } + ESMFOLD.out.pdb .combine(ch_dummy_msa) .combine(ch_dummy_pae) ) @@ -322,7 +322,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) ch_report_input = ch_report_input .mix( - ROSETTAFOLD_ALL_ATOM.out.pdb.map { meta, pdb -> [meta, [pdb]] } + ROSETTAFOLD_ALL_ATOM.out.pdb .join(ROSETTAFOLD_ALL_ATOM.out.msa) .join(ROSETTAFOLD_ALL_ATOM.out.pae) ) @@ -442,7 +442,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(ROSETTAFOLD2NA.out.versions) ch_report_input = ch_report_input .mix( - ROSETTAFOLD2NA.out.pdb.map { meta, pdb -> [meta, [pdb]] } + ROSETTAFOLD2NA.out.pdb .join(ROSETTAFOLD2NA.out.msa) .join(ROSETTAFOLD2NA.out.pae) ) diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index c6af5f1ba..834063c52 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -56,7 +56,7 @@ workflow ESMFOLD { ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) } - modeChannel(RUN_ESMFOLD.out.pdb, "esmfold").set { ch_pdb_final } + modeChannel(RUN_ESMFOLD.out.pdb, "esmfold", true).set { ch_pdb_final } emit: pdb = ch_pdb_final // channel: [ id, /path/to/*.pdb ] diff --git a/workflows/rosettafold2na.nf b/workflows/rosettafold2na.nf index 5d20cc99a..0ffcaea4a 100644 --- a/workflows/rosettafold2na.nf +++ b/workflows/rosettafold2na.nf @@ -46,7 +46,7 @@ workflow ROSETTAFOLD2NA { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD2NA.out.versions) - modeChannel(RUN_ROSETTAFOLD2NA.out.pdb, "rosettafold2na").set { ch_pdb_final } + modeChannel(RUN_ROSETTAFOLD2NA.out.pdb, "rosettafold2na", true).set { ch_pdb_final } modeChannel(RUN_ROSETTAFOLD2NA.out.pae, "rosettafold2na").set { ch_pae_final } modeChannel(RUN_ROSETTAFOLD2NA.out.msa, "rosettafold2na").set { ch_msa_final } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 0d31fa49c..7c202fead 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -63,7 +63,7 @@ workflow ROSETTAFOLD_ALL_ATOM { ) ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) - modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pdb, "rosettafold_all_atom").set { ch_pdb_final } + modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pdb, "rosettafold_all_atom", true).set { ch_pdb_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.msa, "rosettafold_all_atom").set { ch_msa_final } modeChannel(RUN_ROSETTAFOLD_ALL_ATOM.out.pae, "rosettafold_all_atom").set { ch_pae_final } From aaf80a867ddde22f1e354ab35b5bda28ff8fc6cc Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Tue, 31 Mar 2026 16:43:19 +1100 Subject: [PATCH 14/43] newline replacement might be causing mis-rendered PDB structure --- assets/report_template.html | 2 +- bin/generate_report.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/report_template.html b/assets/report_template.html index e94d8369e..e57165506 100644 --- a/assets/report_template.html +++ b/assets/report_template.html @@ -11,7 +11,7 @@ integrity="sha512-yocoLferfPbcwpCMr8v/B0AB4SWpJlouBwgE0D3ZHaiP1nuu5djZclFEIj9znuqghaZ3tdCMRrreLoM8km+jIQ==" crossorigin="anonymous" > - +