pytorch · MukundaKatta · Apr 25, 2026
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
@@ -396,6 +396,8 @@ Complete list of metrics
     fairness.SubgroupDifference
     fairness.SubgroupMetric
     rec_sys.HitRate
+    rec_sys.MAP
+    rec_sys.MRR
     rec_sys.NDCG
 
 .. note::

diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
@@ -38,6 +38,8 @@
 from ignite.metrics.psnr import PSNR
 from ignite.metrics.recall import Recall
 from ignite.metrics.rec_sys.hitrate import HitRate
+from ignite.metrics.rec_sys.map import MAP
+from ignite.metrics.rec_sys.mrr import MRR
 from ignite.metrics.rec_sys.ndcg import NDCG
 from ignite.metrics.roc_auc import ROC_AUC, RocCurve
 from ignite.metrics.root_mean_squared_error import RootMeanSquaredError
@@ -107,5 +109,7 @@
     "CommonObjectDetectionMetrics",
     "coco_tensor_list_to_dict_list",
     "HitRate",
+    "MAP",
+    "MRR",
     "NDCG",
 ]
diff --git a/ignite/metrics/rec_sys/__init__.py b/ignite/metrics/rec_sys/__init__.py
@@ -1,4 +1,6 @@
 from ignite.metrics.rec_sys.hitrate import HitRate
+from ignite.metrics.rec_sys.map import MAP
+from ignite.metrics.rec_sys.mrr import MRR
 from ignite.metrics.rec_sys.ndcg import NDCG
 
-__all__ = ["HitRate", "NDCG"]
+__all__ = ["HitRate", "MAP", "MRR", "NDCG"]
diff --git a/ignite/metrics/rec_sys/map.py b/ignite/metrics/rec_sys/map.py
@@ -0,0 +1,169 @@
+from collections.abc import Callable
+
+import torch
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
+
+__all__ = ["MAP"]
+
+
+class MAP(Metric):
+    r"""Calculates the Mean Average Precision (MAP) at `k` for Recommendation Systems.
+
+    MAP measures the mean of Average Precision (AP) across all users. AP for a
+    single user is the average of precision values computed at every position
+    where a relevant item appears in the ranked top-k list, divided by the
+    total number of relevant items for that user (clipped at ``k``).
+
+    .. math::
+        \text{AP}@K_i = \frac{1}{\min(R_i, K)}
+        \sum_{j=1}^{K} \text{Precision}@j \cdot \mathbb{1}(\text{rel}_{i,j})
+
+    .. math::
+        \text{MAP}@K = \frac{1}{N} \sum_{i=1}^{N} \text{AP}@K_i
+
+    where :math:`R_i` is the number of relevant items for user :math:`i`,
+    :math:`\text{rel}_{i,j}` is 1 if the item at rank :math:`j` is relevant
+    and 0 otherwise, and :math:`\text{Precision}@j` is the proportion of
+    relevant items in the top :math:`j` ranked predictions.
+
+    - ``update`` must receive output of the form ``(y_pred, y)``.
+    - ``y_pred`` is expected to be raw logits or probability scores for each item
+      in the catalog.
+    - ``y`` is expected to be binary (only 0s and 1s) values where ``1`` indicates
+      a relevant item.
+    - ``y_pred`` and ``y`` are only allowed shape :math:`(batch, num\_items)`.
+    - returns a list of MAP values ordered by the sorted values of ``top_k``.
+
+    Args:
+        top_k: a single positive integer or a list of positive integers that specifies
+            ``k`` for calculating MAP@top-k. If a single int is provided, it will be
+            wrapped in a list. Default is 10.
+        ignore_zero_hits: if True, users with no relevant items (ground truth tensor
+            being all zeros) are ignored in computation of MAP. If set False, such
+            users are counted with an Average Precision of 0. By default, True.
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into
+            the form expected by the metric.
+            The output is expected to be a tuple ``(prediction, target)`` where
+            ``prediction`` and ``target`` are tensors of shape ``(batch, num_items)``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the
+            ``update`` method is non-blocking. By default, CPU.
+        skip_unrolling: specifies whether input should be unrolled or not before
+            being processed. Should be true for multi-output models.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric
+        instance to the engine. The output of the engine's ``process_function``
+        needs to be in the format of ``(y_pred, y)``. If not, ``output_transform``
+        can be added to the metric to transform the output into the form expected
+        by the metric.
+
+        For more information on how metric works with
+        :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = MAP(top_k=[1, 2, 3, 4])
+            metric.attach(default_evaluator, "map")
+            y_pred = torch.Tensor([
+                [4.0, 2.0, 3.0, 1.0],
+                [1.0, 2.0, 3.0, 4.0],
+            ])
+            y_true = torch.Tensor([
+                [0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, 0.0, 1.0],
+            ])
+            state = default_evaluator.run([(y_pred, y_true)])
+            print(state.metrics["map"])
+
+    .. versionadded:: 0.6.0
+    """
+
+    required_output_keys = ("y_pred", "y")
+    _state_dict_all_req_keys = ("_sum_ap_per_k", "_num_examples")
+
+    def __init__(
+        self,
+        top_k: list[int] | int = 10,
+        ignore_zero_hits: bool = True,
+        output_transform: Callable = lambda x: x,
+        device: str | torch.device = torch.device("cpu"),
+        skip_unrolling: bool = False,
+    ):
+        if not isinstance(top_k, (int, list)):
+            raise ValueError("top_k must be either int or a list[int]")
+
+        top_k = [top_k] if isinstance(top_k, int) else top_k
+
+        if len(top_k) == 0:
+            raise ValueError("top_k must have at least one positive value")
+        if any(k <= 0 for k in top_k):
+            raise ValueError("top_k must be list of positive integers only.")
+
+        self.top_k = sorted(top_k)
+        self.ignore_zero_hits = ignore_zero_hits
+        super().__init__(output_transform, device=device, skip_unrolling=skip_unrolling)
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        self._sum_ap_per_k = torch.zeros(len(self.top_k), device=self._device)
+        self._num_examples = 0
+
+    @reinit__is_reduced
+    def update(self, output: tuple[torch.Tensor, torch.Tensor]) -> None:
+        if len(output) != 2:
+            raise ValueError(f"output should be in format `(y_pred,y)` but got tuple of {len(output)} tensors.")
+
+        y_pred, y = output
+        if y_pred.shape != y.shape:
+            raise ValueError(f"y_pred and y must be in the same shape, got {y_pred.shape} != {y.shape}.")
+
+        if self.ignore_zero_hits:
+            valid_mask = torch.any(y > 0, dim=-1)
+            y_pred = y_pred[valid_mask]
+            y = y[valid_mask]
+
+        if y.shape[0] == 0:
+            return
+
+        max_k = self.top_k[-1]
+        _, indices = torch.topk(y_pred, k=max_k, dim=-1)
+        ranked_relevance = torch.gather(y, dim=-1, index=indices).to(torch.float32)
+
+        # Total number of relevant items per user across the catalog (used as
+        # the AP denominator, clipped at k below).
+        total_relevant = (y > 0).to(torch.float32).sum(dim=-1)
+
+        for i, k in enumerate(self.top_k):
+            top_k_relevance = ranked_relevance[:, :k]
+            # Cumulative number of relevant items at each rank up to k.
+            cumulative_hits = torch.cumsum(top_k_relevance, dim=-1)
+            positions = torch.arange(1, k + 1, dtype=torch.float32, device=top_k_relevance.device)
+            # Precision@j evaluated at every rank j in [1, k].
+            precision_at_j = cumulative_hits / positions
+            # Sum precision values only at positions where the item is relevant.
+            sum_precision = (precision_at_j * top_k_relevance).sum(dim=-1)
+
+            denom = torch.clamp(total_relevant, max=float(k))
+            ap_k = torch.where(
+                denom > 0,
+                sum_precision / denom,
+                torch.zeros_like(sum_precision),
+            )
+            self._sum_ap_per_k[i] += ap_k.sum().to(self._device)
+
+        self._num_examples += y.shape[0]
+
+    @sync_all_reduce("_sum_ap_per_k", "_num_examples")
+    def compute(self) -> list[float]:
+        if self._num_examples == 0:
+            raise NotComputableError("MAP must have at least one example.")
+
+        rates = (self._sum_ap_per_k / self._num_examples).tolist()
+        return rates
diff --git a/ignite/metrics/rec_sys/mrr.py b/ignite/metrics/rec_sys/mrr.py
@@ -0,0 +1,167 @@
+from collections.abc import Callable
+
+import torch
+
+from ignite.exceptions import NotComputableError
+from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
+
+__all__ = ["MRR"]
+
+
+class MRR(Metric):
+    r"""Calculates the Mean Reciprocal Rank (MRR) at `k` for Recommendation Systems.
+
+    MRR measures the average reciprocal rank of the first relevant item in the
+    predicted ranking for each user. The reciprocal rank for a user is
+    :math:`1/\text{rank}` where :math:`\text{rank}` is the position of the first
+    relevant item in the ranked list (1-indexed). Users for which no relevant
+    item appears in the top-k results contribute 0 to the score.
+
+    .. math:: \text{MRR}@K = \frac{1}{N} \sum_{i=1}^{N} \frac{1}{\text{rank}_i}
+
+    where :math:`\text{rank}_i \in \{1, 2, \ldots, K\}` is the rank of the first
+    relevant item for user :math:`i` in the top-k predictions, and is treated as
+    :math:`\infty` (yielding a reciprocal rank of 0) if no relevant item is in
+    the top-k predictions.
+
+    - ``update`` must receive output of the form ``(y_pred, y)``.
+    - ``y_pred`` is expected to be raw logits or probability scores for each item
+      in the catalog.
+    - ``y`` is expected to be binary (only 0s and 1s) values where ``1`` indicates
+      a relevant item.
+    - ``y_pred`` and ``y`` are only allowed shape :math:`(batch, num\_items)`.
+    - returns a list of MRR values ordered by the sorted values of ``top_k``.
+
+    Args:
+        top_k: a single positive integer or a list of positive integers that specifies
+            ``k`` for calculating MRR@top-k. If a single int is provided, it will be
+            wrapped in a list. Default is 10.
+        ignore_zero_hits: if True, users with no relevant items (ground truth tensor
+            being all zeros) are ignored in computation of MRR. If set False, such
+            users are counted with a reciprocal rank of 0. By default, True.
+        output_transform: a callable that is used to transform the
+            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into
+            the form expected by the metric.
+            The output is expected to be a tuple ``(prediction, target)`` where
+            ``prediction`` and ``target`` are tensors of shape ``(batch, num_items)``.
+        device: specifies which device updates are accumulated on. Setting the
+            metric's device to be the same as your ``update`` arguments ensures the
+            ``update`` method is non-blocking. By default, CPU.
+        skip_unrolling: specifies whether input should be unrolled or not before
+            being processed. Should be true for multi-output models.
+
+    Examples:
+        To use with ``Engine`` and ``process_function``, simply attach the metric
+        instance to the engine. The output of the engine's ``process_function``
+        needs to be in the format of ``(y_pred, y)``. If not, ``output_transform``
+        can be added to the metric to transform the output into the form expected
+        by the metric.
+
+        For more information on how metric works with
+        :class:`~ignite.engine.engine.Engine`, visit :ref:`attach-engine`.
+
+        .. include:: defaults.rst
+            :start-after: :orphan:
+
+        .. testcode::
+
+            metric = MRR(top_k=[1, 2, 3, 4])
+            metric.attach(default_evaluator, "mrr")
+            y_pred = torch.Tensor([
+                [4.0, 2.0, 3.0, 1.0],
+                [1.0, 2.0, 3.0, 4.0],
+            ])
+            y_true = torch.Tensor([
+                [0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, 0.0, 1.0],
+            ])
+            state = default_evaluator.run([(y_pred, y_true)])
+            print(state.metrics["mrr"])
+
+        .. testoutput::
+
+            [0.5, 0.75, 0.75, 0.75]
+
+    .. versionadded:: 0.6.0
+    """
+
+    required_output_keys = ("y_pred", "y")
+    _state_dict_all_req_keys = ("_sum_rr_per_k", "_num_examples")
+
+    def __init__(
+        self,
+        top_k: list[int] | int = 10,
+        ignore_zero_hits: bool = True,
+        output_transform: Callable = lambda x: x,
+        device: str | torch.device = torch.device("cpu"),
+        skip_unrolling: bool = False,
+    ):
+        if not isinstance(top_k, (int, list)):
+            raise ValueError("top_k must be either int or a list[int]")
+
+        top_k = [top_k] if isinstance(top_k, int) else top_k
+
+        if len(top_k) == 0:
+            raise ValueError("top_k must have at least one positive value")
+        if any(k <= 0 for k in top_k):
+            raise ValueError("top_k must be list of positive integers only.")
+
+        self.top_k = sorted(top_k)
+        self.ignore_zero_hits = ignore_zero_hits
+        super().__init__(output_transform, device=device, skip_unrolling=skip_unrolling)
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        self._sum_rr_per_k = torch.zeros(len(self.top_k), device=self._device)
+        self._num_examples = 0
+
+    @reinit__is_reduced
+    def update(self, output: tuple[torch.Tensor, torch.Tensor]) -> None:
+        if len(output) != 2:
+            raise ValueError(f"output should be in format `(y_pred,y)` but got tuple of {len(output)} tensors.")
+
+        y_pred, y = output
+        if y_pred.shape != y.shape:
+            raise ValueError(f"y_pred and y must be in the same shape, got {y_pred.shape} != {y.shape}.")
+
+        if self.ignore_zero_hits:
+            valid_mask = torch.any(y > 0, dim=-1)
+            y_pred = y_pred[valid_mask]
+            y = y[valid_mask]
+
+        if y.shape[0] == 0:
+            return
+
+        max_k = self.top_k[-1]
+        # Get top-max_k predictions ordered by descending score.
+        _, indices = torch.topk(y_pred, k=max_k, dim=-1)
+        # Gather corresponding relevance labels in ranked order.
+        ranked_relevance = torch.gather(y, dim=-1, index=indices)
+
+        batch_size = y.shape[0]
+
+        for i, k in enumerate(self.top_k):
+            top_k_relevance = ranked_relevance[:, :k]
+            # First-relevant position per user (1-indexed). For users with no
+            # relevant item in top-k, fall back to a sentinel position whose
+            # reciprocal rank evaluates to 0.
+            has_hit = torch.any(top_k_relevance > 0, dim=-1)
+            # argmax returns the index of the first max; for binary labels that
+            # is the position of the first 1 when at least one exists.
+            first_pos = torch.argmax(top_k_relevance.to(torch.long), dim=-1) + 1
+            reciprocal_rank = torch.where(
+                has_hit,
+                1.0 / first_pos.to(torch.float32),
+                torch.zeros(batch_size, device=top_k_relevance.device),
+            )
+            self._sum_rr_per_k[i] += reciprocal_rank.sum().to(self._device)
+
+        self._num_examples += y.shape[0]
+
+    @sync_all_reduce("_sum_rr_per_k", "_num_examples")
+    def compute(self) -> list[float]:
+        if self._num_examples == 0:
+            raise NotComputableError("MRR must have at least one example.")
+
+        rates = (self._sum_rr_per_k / self._num_examples).tolist()
+        return rates