diff --git a/lib/galaxy/tool_util/deps/mulled/mulled_build.py b/lib/galaxy/tool_util/deps/mulled/mulled_build.py index 67008512a26d..4ee5ff29a18e 100644 --- a/lib/galaxy/tool_util/deps/mulled/mulled_build.py +++ b/lib/galaxy/tool_util/deps/mulled/mulled_build.py @@ -30,6 +30,7 @@ ) import yaml +from requests import Session from typing_extensions import Literal from galaxy.tool_util.deps import installable @@ -57,6 +58,8 @@ get_files_from_conda_package, PrintProgress, quay_repository, + quay_tag_exists, + QuayApiException, v1_image_name, v2_image_name, ) @@ -206,6 +209,18 @@ class BuildExistsException(Exception): """ +def _repo_data_contains_tag(repo_data: Dict[str, Any], target_tag: str) -> bool: + if "error_type" in repo_data and repo_data["error_type"] in {"invalid_token", "not_found"}: + return False + + tags = repo_data.get("tags", {}) + if isinstance(tags, dict): + return target_tag in tags + if isinstance(tags, list): + return target_tag in tags + raise QuayApiException(f"Unexpected response from quay.io - no tags description found [{repo_data}]") + + def mull_targets( targets: List[CondaTarget], involucro_context: Optional["InvolucroContext"] = None, @@ -232,6 +247,7 @@ def mull_targets( determine_base_image: bool = True, invfile: str = INVFILE, strict_channel_priority: bool = True, + session: Optional[Session] = None, ) -> int: if involucro_context is None: involucro_context = InvolucroContext() @@ -250,21 +266,37 @@ def mull_targets( if not rebuild or "push" in command: repo_name = repo_template_kwds["image"].split(":", 1)[0] - repo_data = quay_repository(repo_template_kwds["namespace"], repo_name) + repo_data = None if not rebuild: - tags = repo_data.get("tags", []) - target_tag = None if ":" in repo_template_kwds["image"]: image_name_parts = repo_template_kwds["image"].split(":") assert len(image_name_parts) == 2, f": not allowed in image name [{repo_template_kwds['image']}]" target_tag = image_name_parts[1] - - if tags and (target_tag is None or target_tag in tags): - raise BuildExistsException() - if "push" in command and "error_type" in repo_data and oauth_token: - # Explicitly create the repository so it can be built as public. - create_repository(repo_template_kwds["namespace"], repo_name, oauth_token) + if target_tag is not None: + tag_exists = quay_tag_exists(repo_template_kwds["namespace"], repo_name, target_tag, session=session) + if tag_exists is None: + log.warning( + "Falling back to quay repository metadata for %s/%s:%s after registry manifest probe was inconclusive", + repo_template_kwds["namespace"], + repo_name, + target_tag, + ) + repo_data = quay_repository(repo_template_kwds["namespace"], repo_name, session=session) + tag_exists = _repo_data_contains_tag(repo_data, target_tag) + if tag_exists: + raise BuildExistsException() + else: + repo_data = quay_repository(repo_template_kwds["namespace"], repo_name, session=session) + tags = repo_data.get("tags", []) + if tags: + raise BuildExistsException() + if "push" in command: + if repo_data is None: + repo_data = quay_repository(repo_template_kwds["namespace"], repo_name, session=session) + if "error_type" in repo_data and oauth_token: + # Explicitly create the repository so it can be built as public. + create_repository(repo_template_kwds["namespace"], repo_name, oauth_token) for channel in channels: if channel.startswith("file://"): diff --git a/lib/galaxy/tool_util/deps/mulled/mulled_build_channel.py b/lib/galaxy/tool_util/deps/mulled/mulled_build_channel.py index 4656a6fe0318..252f098de2fb 100644 --- a/lib/galaxy/tool_util/deps/mulled/mulled_build_channel.py +++ b/lib/galaxy/tool_util/deps/mulled/mulled_build_channel.py @@ -82,7 +82,7 @@ def run_channel(args, build_last_n_versions: int = 1) -> None: for tag in versions: target = build_target(pkg_name, tag=tag) targets = [target] - mull_targets(targets, test=pkg_tests, **args_to_mull_targets_kwds(args)) + mull_targets(targets, test=pkg_tests, session=session, **args_to_mull_targets_kwds(args)) def get_pkg_names(args): diff --git a/lib/galaxy/tool_util/deps/mulled/mulled_build_files.py b/lib/galaxy/tool_util/deps/mulled/mulled_build_files.py index 27bc2ba0e227..807dc3c8afd0 100644 --- a/lib/galaxy/tool_util/deps/mulled/mulled_build_files.py +++ b/lib/galaxy/tool_util/deps/mulled/mulled_build_files.py @@ -24,6 +24,7 @@ ) from galaxy.tool_util.deps.conda_util import CondaTarget +from galaxy.util import requests from ._cli import arg_parser from .mulled_build import ( add_build_arguments, @@ -57,6 +58,7 @@ def main(argv=None): help="Path to directory (or single file) of TSV files describing composite recipes.", ) args = parser.parse_args() + session = requests.session() for target in generate_targets(args.files): try: ret = mull_targets( @@ -65,6 +67,7 @@ def main(argv=None): name_override=target.name_override, base_image=target.base_image, determine_base_image=False, + session=session, **args_to_mull_targets_kwds(args), ) except BuildExistsException: diff --git a/lib/galaxy/tool_util/deps/mulled/util.py b/lib/galaxy/tool_util/deps/mulled/util.py index 6ad094a8ce50..e5a5debb9e7e 100644 --- a/lib/galaxy/tool_util/deps/mulled/util.py +++ b/lib/galaxy/tool_util/deps/mulled/util.py @@ -23,6 +23,8 @@ from conda_package_streaming.url import stream_conda_info as stream_conda_info_from_url from packaging.version import Version from requests import Session +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry from galaxy.tool_util.deps.conda_util import ( CondaContext, @@ -41,12 +43,27 @@ log = logging.getLogger(__name__) QUAY_REPOSITORY_API_ENDPOINT = "https://quay.io/api/v1/repository" +QUAY_REGISTRY_API_ENDPOINT = "https://quay.io/v2" BUILD_NUMBER_REGEX = re.compile(r"\d+$") MULLED_SOCKET_TIMEOUT = 12 QUAY_VERSIONS_CACHE_EXPIRY = 300 +QUAY_REQUEST_RETRY_STATUS_CODES = (408, 425, 429, 500, 502, 503, 504) +QUAY_REQUEST_MAX_RETRIES = 5 +QUAY_REQUEST_BACKOFF_FACTOR = 1 +QUAY_MANIFEST_ACCEPT = ",".join( + [ + "application/vnd.docker.distribution.manifest.v2+json", + "application/vnd.docker.distribution.manifest.list.v2+json", + "application/vnd.oci.image.manifest.v1+json", + "application/vnd.oci.image.index.v1+json", + "application/vnd.docker.distribution.manifest.v1+json", + ] +) NAMESPACE_HAS_REPO_NAME_KEY = "galaxy.tool_util.deps.container_resolvers.mulled.util:namespace_repo_names" TAG_CACHE_KEY = "galaxy.tool_util.deps.container_resolvers.mulled.util:tag_cache" CONDA_IMAGE = os.environ.get("CONDA_IMAGE", "quay.io/condaforge/miniforge3:latest") +_quay_session_lock = threading.Lock() +_shared_quay_session: Optional[Session] = None class PARSED_TAG(NamedTuple): @@ -56,6 +73,10 @@ class PARSED_TAG(NamedTuple): build_number: int +class QuayApiException(Exception): + """Raised when quay.io returns an unexpected response.""" + + def default_mulled_conda_channels_from_env() -> Optional[List[str]]: if "DEFAULT_MULLED_CONDA_CHANNELS" in os.environ: return os.environ["DEFAULT_MULLED_CONDA_CHANNELS"].split(",") @@ -105,15 +126,68 @@ def create_repository(namespace: str, repo_name: str, oauth_token: str) -> None: requests.post("https://quay.io/api/v1/repository", json=data, headers=headers, timeout=MULLED_SOCKET_TIMEOUT) +def _build_quay_session() -> Session: + retry_strategy = Retry( + total=QUAY_REQUEST_MAX_RETRIES, + connect=QUAY_REQUEST_MAX_RETRIES, + read=QUAY_REQUEST_MAX_RETRIES, + status=QUAY_REQUEST_MAX_RETRIES, + backoff_factor=QUAY_REQUEST_BACKOFF_FACTOR, + status_forcelist=QUAY_REQUEST_RETRY_STATUS_CODES, + allowed_methods=frozenset({"GET", "HEAD"}), + respect_retry_after_header=True, + raise_on_status=False, + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session = requests.session() + session.mount("https://", adapter) + session.mount("http://", adapter) + return session + + +def _get_quay_session(session: Optional[Session] = None) -> Session: + if session is not None: + return session + + global _shared_quay_session + if _shared_quay_session is None: + with _quay_session_lock: + if _shared_quay_session is None: + _shared_quay_session = _build_quay_session() + return _shared_quay_session + + +def _quay_api_error(response, url: str) -> QuayApiException: + try: + detail = response.json() + except ValueError: + detail = response.text[:200] + return QuayApiException(f"Unexpected quay.io response for {url} [{response.status_code}]: {detail!r}") + + +def _quay_json_dict(response, url: str) -> Dict[str, Any]: + try: + data = response.json() + except ValueError as exc: + raise QuayApiException( + f"Failed to decode quay.io JSON response for {url} [{response.status_code}]: {response.text[:200]!r}" + ) from exc + + if not isinstance(data, dict): + raise QuayApiException(f"Unexpected quay.io response type for {url}: {type(data).__name__}") + + return data + + def quay_versions(namespace: str, pkg_name: str, session: Optional[Session] = None) -> List[str]: """Get all version tags for a Docker image stored on quay.io for supplied package name.""" data = quay_repository(namespace, pkg_name, session=session) - if "error_type" in data and data["error_type"] == "invalid_token": + if "error_type" in data and data["error_type"] in {"invalid_token", "not_found"}: return [] if "tags" not in data: - raise Exception(f"Unexpected response from quay.io - no tags description found [{data}]") + raise QuayApiException(f"Unexpected response from quay.io - no tags description found [{data}]") return [tag for tag in data["tags"].keys() if tag != "latest"] @@ -122,11 +196,51 @@ def quay_repository(namespace: str, pkg_name: str, session: Optional[Session] = assert namespace is not None assert pkg_name is not None url = f"https://quay.io/api/v1/repository/{namespace}/{pkg_name}" - if not session: - session = requests.session() - response = session.get(url, timeout=MULLED_SOCKET_TIMEOUT) - data = response.json() - return data + response = _get_quay_session(session).get(url, timeout=MULLED_SOCKET_TIMEOUT) + if response.status_code in {401, 404}: + try: + data = _quay_json_dict(response, url) + except QuayApiException: + if response.status_code == 404: + # Some missing-repo responses are non-JSON; normalize them to not_found. + return {"error_type": "not_found"} + raise + if response.status_code == 401 and data.get("error_type") != "invalid_token": + raise _quay_api_error(response, url) + # Quay uses 401 invalid_token for some public repo/tag misses. + return data + if response.status_code >= 400: + raise _quay_api_error(response, url) + return _quay_json_dict(response, url) + + +def quay_tag_exists(namespace: str, pkg_name: str, tag: str, session: Optional[Session] = None) -> Optional[bool]: + """Probe the quay registry manifest endpoint for a single tag. + + Returns ``True`` or ``False`` when the manifest ``HEAD`` probe yields a + definitive answer. Returns ``None`` when callers should fall back to + repository metadata. + """ + assert namespace is not None + assert pkg_name is not None + assert tag is not None + + url = f"{QUAY_REGISTRY_API_ENDPOINT}/{namespace}/{pkg_name}/manifests/{tag}" + response = _get_quay_session(session).head( + url, + headers={"Accept": QUAY_MANIFEST_ACCEPT}, + timeout=MULLED_SOCKET_TIMEOUT, + ) + if response.status_code == 404: + # A manifest HEAD 404 is the normal "tag does not exist" case. + return False + if response.status_code == 200: + return True + # Quay can return 401 invalid_token here for public repos, so callers may + # need to fall back to repository metadata to disambiguate the result. + if response.status_code == 401 or response.status_code in QUAY_REQUEST_RETRY_STATUS_CODES: + return None + raise _quay_api_error(response, url) def _get_namespace(namespace: str) -> List[str]: @@ -136,10 +250,12 @@ def _get_namespace(namespace: str) -> List[str]: repos_headers = {"Accept-encoding": "gzip", "Accept": "application/json"} while True: repos_parameters = {"public": "true", "namespace": namespace, "next_page": next_page} - repos_response = requests.get( + repos_response = _get_quay_session().get( QUAY_REPOSITORY_API_ENDPOINT, headers=repos_headers, params=repos_parameters, timeout=MULLED_SOCKET_TIMEOUT ) - repos_response_json = repos_response.json() + if repos_response.status_code >= 400: + raise _quay_api_error(repos_response, QUAY_REPOSITORY_API_ENDPOINT) + repos_response_json = _quay_json_dict(repos_response, QUAY_REPOSITORY_API_ENDPOINT) repos = repos_response_json["repositories"] repo_names += [r["name"] for r in repos] next_page = repos_response_json.get("next_page") @@ -483,6 +599,7 @@ def __exit__(self, exc_type, exc_val, exc_tb) -> None: "get_files_from_conda_package", "image_name", "mulled_tags_for", + "quay_tag_exists", "quay_versions", "split_container_name", "split_tag", diff --git a/test/unit/tool_util/mulled/test_mulled_util.py b/test/unit/tool_util/mulled/test_mulled_util.py index 9aa9c81fda43..8af995339cef 100644 --- a/test/unit/tool_util/mulled/test_mulled_util.py +++ b/test/unit/tool_util/mulled/test_mulled_util.py @@ -1,6 +1,16 @@ import pytest +import requests +import responses -from galaxy.tool_util.deps.mulled.util import version_sorted +from galaxy.tool_util.deps.mulled.util import ( + quay_repository, + quay_tag_exists, + QuayApiException, + version_sorted, +) + +MANIFEST_URL = "https://quay.io/v2/biocontainers/samtools/manifests/1.17--0" +REPOSITORY_URL = "https://quay.io/api/v1/repository/biocontainers/samtools" @pytest.mark.parametrize( @@ -17,3 +27,50 @@ ) def test_version_sorted(tags, tag): assert version_sorted(tags)[0] == tag + + +@responses.activate +def test_quay_tag_exists_uses_registry_head(): + session = requests.Session() + responses.add(responses.HEAD, MANIFEST_URL, status=200) + + assert quay_tag_exists("biocontainers", "samtools", "1.17--0", session=session) is True + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == MANIFEST_URL + assert responses.calls[0].request.method == "HEAD" + + +@responses.activate +def test_quay_tag_exists_returns_false_for_missing_tag(): + session = requests.Session() + responses.add(responses.HEAD, MANIFEST_URL, status=404) + + assert quay_tag_exists("biocontainers", "samtools", "1.17--0", session=session) is False + + +@responses.activate +def test_quay_tag_exists_returns_none_for_transient_failures(): + session = requests.Session() + responses.add(responses.HEAD, MANIFEST_URL, status=502) + + assert quay_tag_exists("biocontainers", "samtools", "1.17--0", session=session) is None + assert len(responses.calls) == 1 + assert responses.calls[0].request.method == "HEAD" + + +@responses.activate +def test_quay_tag_exists_does_not_fall_back_for_non_transient_errors(): + session = requests.Session() + responses.add(responses.HEAD, MANIFEST_URL, status=403) + + with pytest.raises(QuayApiException): + quay_tag_exists("biocontainers", "samtools", "1.17--0", session=session) + assert len(responses.calls) == 1 + + +@responses.activate +def test_quay_repository_returns_invalid_token_response_for_401(): + session = requests.Session() + responses.add(responses.GET, REPOSITORY_URL, json={"error_type": "invalid_token"}, status=401) + + assert quay_repository("biocontainers", "samtools", session=session) == {"error_type": "invalid_token"}