diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b37ea9291..e4055d30a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: sudo apt-get install -y openmpi-bin libopenmpi3 libopenmpi-dev - name: install haddock3 with extra dependencies - run: pip install -e '.[mpi,dev,docs,notebooks]' + run: pip install -e '.[mpi,dev,docs,notebooks,deeprank-gnn-esm]' - name: run unit tests run: >- @@ -110,7 +110,9 @@ jobs: brew install open-mpi - name: install haddock3 with extra dependencies - run: pip install -e '.[mpi,dev,docs,notebooks]' + run: | + pip install torch --extra-index-url https://download.pytorch.org/whl/cpu + pip install -e '.[mpi,dev,docs,notebooks,deeprank-gnn-esm]' - name: run unit tests run: >- diff --git a/integration_tests/__init__.py b/integration_tests/__init__.py index 8e1f4c1b54..e203b47878 100644 --- a/integration_tests/__init__.py +++ b/integration_tests/__init__.py @@ -30,6 +30,20 @@ MPI_ENABLED = False has_mpi = pytest.mark.skipif(not MPI_ENABLED, reason="MPI is not enabled") + +import platform as _platform + +try: + import deeprank_gnn.predict # noqa: F401 + + DEEPRANK_ENABLED = True +except (ImportError, ModuleNotFoundError): + DEEPRANK_ENABLED = False + +has_deeprank = pytest.mark.skipif( + not DEEPRANK_ENABLED, + reason="deeprank_gnn is not installed or not supported on this platform", +) has_grid = pytest.mark.skipif(not ping_dirac(), reason="Dirac not reachable") is_linux_x86_64 = pytest.mark.skipif( platform.system().lower() != "linux" or platform.machine().lower() != "x86_64", diff --git a/integration_tests/test_deeprank.py b/integration_tests/test_deeprank.py new file mode 100644 index 0000000000..c69bbb5576 --- /dev/null +++ b/integration_tests/test_deeprank.py @@ -0,0 +1,67 @@ +"""Integration test for the deeprank scoring module.""" + +import shutil +import tempfile +from pathlib import Path + +import pytest + +from haddock.libs.libontology import PDBFile +from haddock.modules.scoring.deeprank import DEFAULT_CONFIG as DEEPRANK_CONF +from haddock.modules.scoring.deeprank import HaddockModule as DeeprankModule + +from integration_tests import GOLDEN_DATA, has_deeprank + + +class MockPreviousIO: + def __init__(self, path): + self.path = path + + def retrieve_models(self, individualize=False): + target_models = ["protprot_complex_1.pdb", "protprot_complex_2.pdb"] + model_list = [] + for pdb in target_models: + src = GOLDEN_DATA / pdb + dst = Path(self.path, src.name) + shutil.copy(src, dst) + model_list.append(PDBFile(file_name=str(dst), path=self.path)) + + return model_list + + +@pytest.fixture +def deeprank_module(): + with tempfile.TemporaryDirectory() as tmpdir: + module = DeeprankModule( + order=0, + path=Path(tmpdir), + init_params=DEEPRANK_CONF, + ) + module.params["ncores"] = 1 + module.params["chain_i"] = "A" + module.params["chain_j"] = "B" + yield module + + +@has_deeprank +def test_deeprank_run(deeprank_module, mocker): + deeprank_module.previous_io = MockPreviousIO(path=deeprank_module.path) + mocker.patch( + "haddock.modules.BaseHaddockModule.export_io_models", + return_value=None, + ) + + deeprank_module.run() + + assert len(deeprank_module.output_models) == 2 + model1 = deeprank_module.output_models[0] + assert model1.score is not None + assert isinstance(model1.score, float) + + model2 = deeprank_module.output_models[1] + assert model2.score is not None + assert isinstance(model2.score, float) + + # FIXME: Make sure the results are consistent + # assert model1.score == pytest.approx(0.119) + # assert model2.score == pytest.approx(0.081) diff --git a/pyproject.toml b/pyproject.toml index 2ed1e567b4..4a204083fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,8 @@ mpi = ["mpi4py>=4.0.2"] notebooks = ["py3Dmol>=2.5.2"] +deeprank-gnn-esm = ["deeprank-gnn-esm>=1.1.0"] + [project.urls] Homepage = "https://github.com/haddocking/haddock3" Documentation = "https://github.com/haddocking/haddock3#readme" diff --git a/src/haddock/modules/scoring/deeprank/__init__.py b/src/haddock/modules/scoring/deeprank/__init__.py new file mode 100644 index 0000000000..1a148ed75e --- /dev/null +++ b/src/haddock/modules/scoring/deeprank/__init__.py @@ -0,0 +1,68 @@ +from pathlib import Path +from haddock.core.typing import FilePath, Any +from haddock.core.defaults import MODULE_DEFAULT_YAML + +from haddock.libs.libutil import parse_ncores +from haddock.modules import BaseHaddockModule + +from haddock.modules.scoring.deeprank.deeprank import ( + DeeprankWraper, + deeprank_is_available, +) + +RECIPE_PATH = Path(__file__).resolve().parent +DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML) + + +class HaddockModule(BaseHaddockModule): + name = RECIPE_PATH.name + + def __init__( + self, + order: int, + path: Path, + *ignore: Any, + init_params: FilePath = DEFAULT_CONFIG, + **everything: Any, + ) -> None: + super().__init__(order, path, init_params) + + @classmethod + def confirm_installation(cls) -> None: + """Confirm if the module is ready to use.""" + + if not deeprank_is_available(): + raise Exception( + "You are trying to use the `deeprank` module but it is not available, please check the installation instructions" + ) + + return + + def _run(self) -> None: + # TODO: Check you need to add some extra options to the `retrieve_models` method + models_to_use = self.previous_io.retrieve_models() + model_paths = [Path(m.file_name) for m in models_to_use] + + # NOTE: deeprank has its own logic of parallelization mechanism + # so here we DO NOT use haddock's engine and we let deeprank the execution. + # Because of that we need `parse_ncores` explicitly + ncores = parse_ncores(self.params["ncores"]) + deeprank_wrapper = DeeprankWraper( + models=model_paths, + ncores=ncores, + chain_i=self.params["chain_i"], + chain_j=self.params["chain_j"], + path=self.path, + ) + + deeprank_wrapper.run() + result_dic: dict[str, float] = deeprank_wrapper.retrieve_scores() + + # Add the score obtained by deeprank back to the models + for model, model_path in zip(models_to_use, model_paths): + model.score = result_dic[str(model_path)] + + # Pass the models ahead + # TODO: Confirm if the type of `models_to_use` is correct + self.output_models = models_to_use + self.export_io_models() diff --git a/src/haddock/modules/scoring/deeprank/deeprank.py b/src/haddock/modules/scoring/deeprank/deeprank.py new file mode 100644 index 0000000000..e02cab40b2 --- /dev/null +++ b/src/haddock/modules/scoring/deeprank/deeprank.py @@ -0,0 +1,71 @@ +import csv +import os +import sys +from pathlib import Path + + +def deeprank_is_available() -> bool: + try: + import deeprank_gnn # type: ignore + except ImportError: + raise + return True + + +class DeeprankWraper: + def __init__(self, models, ncores, chain_i, chain_j, path): + self.models = models + self.chain_i = chain_i + self.chain_j = chain_j + self.ncores = ncores + self.path = path + + def run(self): + """Run method for the wrapper, it will call deeprank as if we were using the `main` function.""" + + # This import needs to be exactly here + from deeprank_gnn.predict import main as deeprank_main + + for model in self.models: + # NOTE: Since we are using the `main` function that takes `sys.argv` + # we need a hacky solution to override. Here we can simply re-write + # it and pass the arguments we need + original_argv = sys.argv + original_cwd = os.getcwd() + sys.argv = [ + "deeprank", + str(model), + self.chain_i, + self.chain_j, + str(self.ncores), + ] + + try: + # NOTE: deeprank will write its output to the path its being executed, there + # is no way to define where the output will be saved, so here we need to move + # into the `self.path` to trigger the function + os.chdir(self.path) + deeprank_main() + finally: + # NOTE: !!! VERY IMPORTANT !!! + # Since we moved directories and overrode the `sys.argv` we NEED to have this + # `finally` here - it means this branch of the code will always be executed. + # With this we can hopely guarantee we go back to where we should be before + # the execution moves on + sys.argv = original_argv + os.chdir(original_cwd) + + def retrieve_scores(self) -> dict[str, float]: + """Parse the output from deeprank and return the scores.""" + scores = {} + for model in self.models: + csv_path = ( + Path(self.path) + / f"{Path(model).stem}-gnn_esm_pred_{self.chain_i}_{self.chain_j}" + / "GNN_esm_prediction.csv" + ) + with open(csv_path) as f: + reader = csv.DictReader(f) + for row in reader: + scores[str(model)] = float(row["predicted_fnat"]) + return scores diff --git a/src/haddock/modules/scoring/deeprank/defaults.yaml b/src/haddock/modules/scoring/deeprank/defaults.yaml new file mode 100644 index 0000000000..066c0f845d --- /dev/null +++ b/src/haddock/modules/scoring/deeprank/defaults.yaml @@ -0,0 +1,20 @@ +chain_i: + default: "A" + type: string + minchars: 1 + maxchars: 1 + title: First chain ID + short: Chain ID of the first partner. + long: Chain ID of the first partner used by deeprank for scoring. + group: "scoring" + explevel: easy +chain_j: + default: "B" + type: string + minchars: 1 + maxchars: 1 + title: Second chain ID + short: Chain ID of the second partner. + long: Chain ID of the second partner used by deeprank for scoring. + group: "scoring" + explevel: easy diff --git a/tests/__init__.py b/tests/__init__.py index 4eeddde316..19351fdeb1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,7 +7,7 @@ from haddock.libs.libgrid import ping_dirac from haddock.modules import modules_category - +import platform as _platform tests_path = Path(__file__).resolve().parents[0] data_folder = Path(tests_path, "data") @@ -47,7 +47,25 @@ has_grid = pytest.mark.skipif(not ping_dirac(), reason="Dirac not reachable") -_CHROME_BINS = ("google-chrome", "google-chrome-stable", "chromium-browser", "chromium", "chrome") +try: + import deeprank_gnn.predict # noqa: F401 + + DEEPRANK_ENABLED = True +except (ImportError, ModuleNotFoundError): + DEEPRANK_ENABLED = False + +has_deeprank = pytest.mark.skipif( + not DEEPRANK_ENABLED, + reason="deeprank_gnn is not installed or not supported on this platform", +) + +_CHROME_BINS = ( + "google-chrome", + "google-chrome-stable", + "chromium-browser", + "chromium", + "chrome", +) has_chrome = pytest.mark.skipif( not any(shutil.which(b) for b in _CHROME_BINS), reason="Google Chrome not found (required by Kaleido for PNG export)", diff --git a/tests/test_module_deeprank.py b/tests/test_module_deeprank.py new file mode 100644 index 0000000000..ec035d7636 --- /dev/null +++ b/tests/test_module_deeprank.py @@ -0,0 +1,57 @@ +"""Tests for the deeprank scoring module wrapper.""" + +import sys +import tempfile +import shutil +from unittest.mock import MagicMock, patch +from pathlib import Path + +import pytest + +from haddock.libs.libontology import PDBFile +from haddock.modules.scoring.deeprank.deeprank import DeeprankWraper + +from . import golden_data as GOLDEN_DATA, has_deeprank + + +@pytest.fixture +def deeprank_wrapper(): + with tempfile.TemporaryDirectory() as temp_dir: + src = GOLDEN_DATA / "protprot_complex_1.pdb" + dst = Path(temp_dir, src.name) + shutil.copy(src, dst) + yield DeeprankWraper( + models=[dst], + ncores=1, + chain_i="A", + chain_j="B", + path=temp_dir, + ) + + +@has_deeprank +def test_run(deeprank_wrapper): + """Test the execution method of the wrapper.""" + deeprank_wrapper.run() + + model = deeprank_wrapper.models[0] + expected_csv = ( + Path(deeprank_wrapper.path) + / f"{Path(model).stem}-gnn_esm_pred_{deeprank_wrapper.chain_i}_{deeprank_wrapper.chain_j}" + / "GNN_esm_prediction.csv" + ) + + # Check if the results folders were created + assert expected_csv.exists() + + +@has_deeprank +def test_retrieve_scores(deeprank_wrapper): + """Check the method that retrieves the scores.""" + deeprank_wrapper.run() + scores = deeprank_wrapper.retrieve_scores() + + assert len(scores) == len(deeprank_wrapper.models) + for model in deeprank_wrapper.models: + assert str(model) in scores + assert isinstance(scores[str(model)], float)