Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
sudo apt-get install -y openmpi-bin libopenmpi3 libopenmpi-dev

- name: install haddock3 with extra dependencies
run: pip install -e '.[mpi,dev,docs,notebooks]'
run: pip install -e '.[mpi,dev,docs,notebooks,deeprank-gnn-esm]'

- name: run unit tests
run: >-
Expand Down Expand Up @@ -110,7 +110,9 @@ jobs:
brew install open-mpi

- name: install haddock3 with extra dependencies
run: pip install -e '.[mpi,dev,docs,notebooks]'
run: |
pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e '.[mpi,dev,docs,notebooks,deeprank-gnn-esm]'

- name: run unit tests
run: >-
Expand Down
14 changes: 14 additions & 0 deletions integration_tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@
MPI_ENABLED = False

has_mpi = pytest.mark.skipif(not MPI_ENABLED, reason="MPI is not enabled")

import platform as _platform

try:
import deeprank_gnn.predict # noqa: F401

DEEPRANK_ENABLED = True
except (ImportError, ModuleNotFoundError):
DEEPRANK_ENABLED = False

has_deeprank = pytest.mark.skipif(
not DEEPRANK_ENABLED,
reason="deeprank_gnn is not installed or not supported on this platform",
)
has_grid = pytest.mark.skipif(not ping_dirac(), reason="Dirac not reachable")
is_linux_x86_64 = pytest.mark.skipif(
platform.system().lower() != "linux" or platform.machine().lower() != "x86_64",
Expand Down
67 changes: 67 additions & 0 deletions integration_tests/test_deeprank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Integration test for the deeprank scoring module."""

import shutil
import tempfile
from pathlib import Path

import pytest

from haddock.libs.libontology import PDBFile
from haddock.modules.scoring.deeprank import DEFAULT_CONFIG as DEEPRANK_CONF
from haddock.modules.scoring.deeprank import HaddockModule as DeeprankModule

from integration_tests import GOLDEN_DATA, has_deeprank


class MockPreviousIO:
def __init__(self, path):
self.path = path

def retrieve_models(self, individualize=False):
target_models = ["protprot_complex_1.pdb", "protprot_complex_2.pdb"]
model_list = []
for pdb in target_models:
src = GOLDEN_DATA / pdb
dst = Path(self.path, src.name)
shutil.copy(src, dst)
model_list.append(PDBFile(file_name=str(dst), path=self.path))

return model_list


@pytest.fixture
def deeprank_module():
with tempfile.TemporaryDirectory() as tmpdir:
module = DeeprankModule(
order=0,
path=Path(tmpdir),
init_params=DEEPRANK_CONF,
)
module.params["ncores"] = 1
module.params["chain_i"] = "A"
module.params["chain_j"] = "B"
yield module


@has_deeprank
def test_deeprank_run(deeprank_module, mocker):
deeprank_module.previous_io = MockPreviousIO(path=deeprank_module.path)
mocker.patch(
"haddock.modules.BaseHaddockModule.export_io_models",
return_value=None,
)

deeprank_module.run()

assert len(deeprank_module.output_models) == 2
model1 = deeprank_module.output_models[0]
assert model1.score is not None
assert isinstance(model1.score, float)

model2 = deeprank_module.output_models[1]
assert model2.score is not None
assert isinstance(model2.score, float)

# FIXME: Make sure the results are consistent
# assert model1.score == pytest.approx(0.119)
# assert model2.score == pytest.approx(0.081)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ mpi = ["mpi4py>=4.0.2"]

notebooks = ["py3Dmol>=2.5.2"]

deeprank-gnn-esm = ["deeprank-gnn-esm>=1.1.0"]

[project.urls]
Homepage = "https://github.com/haddocking/haddock3"
Documentation = "https://github.com/haddocking/haddock3#readme"
Expand Down
68 changes: 68 additions & 0 deletions src/haddock/modules/scoring/deeprank/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from pathlib import Path
from haddock.core.typing import FilePath, Any
from haddock.core.defaults import MODULE_DEFAULT_YAML

from haddock.libs.libutil import parse_ncores
from haddock.modules import BaseHaddockModule

from haddock.modules.scoring.deeprank.deeprank import (
DeeprankWraper,
deeprank_is_available,
)

RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)


class HaddockModule(BaseHaddockModule):
name = RECIPE_PATH.name

def __init__(
self,
order: int,
path: Path,
*ignore: Any,
init_params: FilePath = DEFAULT_CONFIG,
**everything: Any,
) -> None:
super().__init__(order, path, init_params)

@classmethod
def confirm_installation(cls) -> None:
"""Confirm if the module is ready to use."""

if not deeprank_is_available():
raise Exception(
"You are trying to use the `deeprank` module but it is not available, please check the installation instructions"
)

return

def _run(self) -> None:
# TODO: Check you need to add some extra options to the `retrieve_models` method
models_to_use = self.previous_io.retrieve_models()
model_paths = [Path(m.file_name) for m in models_to_use]

# NOTE: deeprank has its own logic of parallelization mechanism
# so here we DO NOT use haddock's engine and we let deeprank the execution.
# Because of that we need `parse_ncores` explicitly
ncores = parse_ncores(self.params["ncores"])
deeprank_wrapper = DeeprankWraper(
models=model_paths,
ncores=ncores,
chain_i=self.params["chain_i"],
chain_j=self.params["chain_j"],
path=self.path,
)

deeprank_wrapper.run()
result_dic: dict[str, float] = deeprank_wrapper.retrieve_scores()

# Add the score obtained by deeprank back to the models
for model, model_path in zip(models_to_use, model_paths):
model.score = result_dic[str(model_path)]

# Pass the models ahead
# TODO: Confirm if the type of `models_to_use` is correct
self.output_models = models_to_use
self.export_io_models()
71 changes: 71 additions & 0 deletions src/haddock/modules/scoring/deeprank/deeprank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import csv
import os
import sys
from pathlib import Path


def deeprank_is_available() -> bool:
try:
import deeprank_gnn # type: ignore
except ImportError:
raise
return True


class DeeprankWraper:
def __init__(self, models, ncores, chain_i, chain_j, path):
self.models = models
self.chain_i = chain_i
self.chain_j = chain_j
self.ncores = ncores
self.path = path

def run(self):
"""Run method for the wrapper, it will call deeprank as if we were using the `main` function."""

# This import needs to be exactly here
from deeprank_gnn.predict import main as deeprank_main

for model in self.models:
# NOTE: Since we are using the `main` function that takes `sys.argv`
# we need a hacky solution to override. Here we can simply re-write
# it and pass the arguments we need
original_argv = sys.argv
original_cwd = os.getcwd()
sys.argv = [
"deeprank",
str(model),
self.chain_i,
self.chain_j,
str(self.ncores),
]

try:
# NOTE: deeprank will write its output to the path its being executed, there
# is no way to define where the output will be saved, so here we need to move
# into the `self.path` to trigger the function
os.chdir(self.path)
deeprank_main()
finally:
# NOTE: !!! VERY IMPORTANT !!!
# Since we moved directories and overrode the `sys.argv` we NEED to have this
# `finally` here - it means this branch of the code will always be executed.
# With this we can hopely guarantee we go back to where we should be before
# the execution moves on
sys.argv = original_argv
os.chdir(original_cwd)

def retrieve_scores(self) -> dict[str, float]:
"""Parse the output from deeprank and return the scores."""
scores = {}
for model in self.models:
csv_path = (
Path(self.path)
/ f"{Path(model).stem}-gnn_esm_pred_{self.chain_i}_{self.chain_j}"
/ "GNN_esm_prediction.csv"
)
with open(csv_path) as f:
reader = csv.DictReader(f)
for row in reader:
scores[str(model)] = float(row["predicted_fnat"])
return scores
20 changes: 20 additions & 0 deletions src/haddock/modules/scoring/deeprank/defaults.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
chain_i:
default: "A"
type: string
minchars: 1
maxchars: 1
title: First chain ID
short: Chain ID of the first partner.
long: Chain ID of the first partner used by deeprank for scoring.
group: "scoring"
explevel: easy
chain_j:
default: "B"
type: string
minchars: 1
maxchars: 1
title: Second chain ID
short: Chain ID of the second partner.
long: Chain ID of the second partner used by deeprank for scoring.
group: "scoring"
explevel: easy
22 changes: 20 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from haddock.libs.libgrid import ping_dirac
from haddock.modules import modules_category

import platform as _platform

tests_path = Path(__file__).resolve().parents[0]
data_folder = Path(tests_path, "data")
Expand Down Expand Up @@ -47,7 +47,25 @@

has_grid = pytest.mark.skipif(not ping_dirac(), reason="Dirac not reachable")

_CHROME_BINS = ("google-chrome", "google-chrome-stable", "chromium-browser", "chromium", "chrome")
try:
import deeprank_gnn.predict # noqa: F401

DEEPRANK_ENABLED = True
except (ImportError, ModuleNotFoundError):
DEEPRANK_ENABLED = False

has_deeprank = pytest.mark.skipif(
not DEEPRANK_ENABLED,
reason="deeprank_gnn is not installed or not supported on this platform",
)

_CHROME_BINS = (
"google-chrome",
"google-chrome-stable",
"chromium-browser",
"chromium",
"chrome",
)
has_chrome = pytest.mark.skipif(
not any(shutil.which(b) for b in _CHROME_BINS),
reason="Google Chrome not found (required by Kaleido for PNG export)",
Expand Down
57 changes: 57 additions & 0 deletions tests/test_module_deeprank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Tests for the deeprank scoring module wrapper."""

import sys
import tempfile
import shutil
from unittest.mock import MagicMock, patch
from pathlib import Path

import pytest

from haddock.libs.libontology import PDBFile
from haddock.modules.scoring.deeprank.deeprank import DeeprankWraper

from . import golden_data as GOLDEN_DATA, has_deeprank


@pytest.fixture
def deeprank_wrapper():
with tempfile.TemporaryDirectory() as temp_dir:
src = GOLDEN_DATA / "protprot_complex_1.pdb"
dst = Path(temp_dir, src.name)
shutil.copy(src, dst)
yield DeeprankWraper(
models=[dst],
ncores=1,
chain_i="A",
chain_j="B",
path=temp_dir,
)


@has_deeprank
def test_run(deeprank_wrapper):
"""Test the execution method of the wrapper."""
deeprank_wrapper.run()

model = deeprank_wrapper.models[0]
expected_csv = (
Path(deeprank_wrapper.path)
/ f"{Path(model).stem}-gnn_esm_pred_{deeprank_wrapper.chain_i}_{deeprank_wrapper.chain_j}"
/ "GNN_esm_prediction.csv"
)

# Check if the results folders were created
assert expected_csv.exists()


@has_deeprank
def test_retrieve_scores(deeprank_wrapper):
"""Check the method that retrieves the scores."""
deeprank_wrapper.run()
scores = deeprank_wrapper.retrieve_scores()

assert len(scores) == len(deeprank_wrapper.models)
for model in deeprank_wrapper.models:
assert str(model) in scores
assert isinstance(scores[str(model)], float)