MONAI/monai/inferers/merger.py at fe4e11e9a7059be68c94cbf555fba4ec3e74188a · Project-MONAI/MONAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import threading
from abc import ABC, abstractmethod
from collections.abc import Sequence
from contextlib import nullcontext
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, Any

import numpy as np
import torch

from monai.utils import (
    deprecated_arg,
    ensure_tuple_size,
    get_package_version,
    optional_import,
    require_pkg,
    version_geq,
)

if TYPE_CHECKING:
    import zarr
else:
    zarr, _ = optional_import("zarr")

__all__ = ["Merger", "AvgMerger", "ZarrAvgMerger"]


class Merger(ABC):
    """
    A base class for merging patches.
    Extend this class to support operations for `PatchInference`.
    There are two methods that must be implemented in the concrete classes:

        - aggregate: aggregate the values at their corresponding locations
        - finalize: perform any final process and return the merged output

    Args:
        merged_shape: the shape of the tensor required to merge the patches.
        cropped_shape: the shape of the final merged output tensor.
            If not provided, it will be the same as `merged_shape`.
        device: the device where Merger tensors should reside.
    """

    def __init__(
        self,
        merged_shape: Sequence[int],
        cropped_shape: Sequence[int] | None = None,
        device: torch.device | str | None = None,
    ) -> None:
        if merged_shape is None:
            raise ValueError("Argument `merged_shape` must be provided")

        self.merged_shape: tuple[int, ...] = tuple(merged_shape)
        self.cropped_shape: tuple[int, ...] = tuple(self.merged_shape if cropped_shape is None else cropped_shape)
        self.device = device
        self.is_finalized = False

    @abstractmethod
    def aggregate(self, values: torch.Tensor, location: Sequence[int]) -> None:
        """
        Aggregate values for merging.
        This method is being called in a loop and should add values to their corresponding location in the merged output results.

        Args:
            values: a tensor of shape BCHW[D], representing the values of inference output.
            location: a tuple/list giving the top left location of the patch in the output.

        Raises:
            NotImplementedError: When the subclass does not override this method.

        """
        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")

    @abstractmethod
    def finalize(self) -> Any:
        """
        Perform final operations for merging patches and return the final merged output.

        Returns:
            The results of merged patches, which is commonly a torch.Tensor representing the merged result, or
                a string representing the filepath to the merged results on disk.

        Raises:
            NotImplementedError: When the subclass does not override this method.

        """
        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")


class AvgMerger(Merger):
    """Merge patches by taking average of the overlapping area

    Args:
        merged_shape: the shape of the tensor required to merge the patches.
        cropped_shape: the shape of the final merged output tensor.
            If not provided, it will be the same as `merged_shape`.
        device: the device for aggregator tensors and final results.
        value_dtype: the dtype for value aggregating tensor and the final result.
        count_dtype: the dtype for sample counting tensor.
    """

    def __init__(
        self,
        merged_shape: Sequence[int],
        cropped_shape: Sequence[int] | None = None,
        value_dtype: torch.dtype = torch.float32,
        count_dtype: torch.dtype = torch.uint8,
        device: torch.device | str = "cpu",
    ) -> None:
        super().__init__(merged_shape=merged_shape, cropped_shape=cropped_shape, device=device)
        if not self.merged_shape:
            raise ValueError(f"`merged_shape` must be provided for `AvgMerger`. {self.merged_shape} is give.")
        self.value_dtype = value_dtype
        self.count_dtype = count_dtype
        self.values = torch.zeros(self.merged_shape, dtype=self.value_dtype, device=self.device)
        self.counts = torch.zeros(self.merged_shape, dtype=self.count_dtype, device=self.device)

    def aggregate(self, values: torch.Tensor, location: Sequence[int]) -> None:
        """
        Aggregate values for merging.

        Args:
            values: a tensor of shape BCHW[D], representing the values of inference output.
            location: a tuple/list giving the top left location of the patch in the original image.

        Raises:
            NotImplementedError: When the subclass does not override this method.

        """
        if self.is_finalized:
            raise ValueError("`AvgMerger` is already finalized. Please instantiate a new object to aggregate.")
        patch_size = values.shape[2:]
        map_slice = tuple(slice(loc, loc + size) for loc, size in zip(location, patch_size))
        map_slice = ensure_tuple_size(map_slice, values.ndim, pad_val=slice(None), pad_from_start=True)
        self.values[map_slice] += values
        self.counts[map_slice] += 1

    def finalize(self) -> torch.Tensor:
        """
        Finalize merging by dividing values by counts and return the merged tensor.

        Notes:
            To avoid creating a new tensor for the final results (to save memory space),
            after this method is called, `get_values()` method will return the "final" averaged values,
            and not the accumulating values. Also calling `finalize()` multiple times does not have any effect.

        Returns:
            torch.tensor: a tensor of merged patches
        """
        # guard against multiple call to finalize
        if not self.is_finalized:
            # use in-place division to save space
            self.values.div_(self.counts)
            # finalize the shape
            self.values = self.values[tuple(slice(0, end) for end in self.cropped_shape)]
            # set finalize flag to protect performing in-place division again
            self.is_finalized = True

        return self.values

    def get_output(self) -> torch.Tensor:
        """
        Get the final merged output.

        Returns:
            torch.Tensor: merged output.
        """
        return self.finalize()

    def get_values(self) -> torch.Tensor:
        """
        Get the accumulated values during aggregation or final averaged values after it is finalized.

        Returns:
            torch.tensor: aggregated values.

        Notes:
            - If called before calling `finalize()`, this method returns the accumulating values.
            - If called after calling `finalize()`, this method returns the final merged [and averaged] values.
        """
        return self.values

    def get_counts(self) -> torch.Tensor:
        """
        Get the aggregator tensor for number of samples.

        Returns:
            torch.Tensor: number of accumulated samples at each location.
        """
        return self.counts


@require_pkg(pkg_name="zarr")
class ZarrAvgMerger(Merger):
    """Merge patches by taking average of the overlapping area and store the results in zarr array.

    Zarr is a format for the storage of chunked, compressed, N-dimensional arrays.
    Zarr data can be stored in any storage system that can be represented as a key-value store,
    like POSIX file systems, cloud object storage, zip files, and relational and document databases.
    See https://zarr.readthedocs.io/en/stable/ for more details.
    It is particularly useful for storing N-dimensional arrays too large to fit into memory.
    One specific use case of this class is to merge patches extracted from whole slide images (WSI),
    where the merged results do not fit into memory and need to be stored on a file system.

    Args:
        merged_shape: the shape of the tensor required to merge the patches.
        cropped_shape: the shape of the final merged output tensor.
            If not provided, it will be the same as `merged_shape`.
        dtype: the dtype for the final merged result. Default is `float32`.
        value_dtype: the dtype for value aggregating tensor and the final result. Default is `float32`.
        count_dtype: the dtype for sample counting tensor. Default is `uint8`.
        store: the zarr store to save the final results. Default is "merged.zarr".
        value_store: the zarr store to save the value aggregating tensor. Default is a temporary store.
        count_store: the zarr store to save the sample counting tensor. Default is a temporary store.
        compressor: the compressor for final merged zarr array. Default is None.
            Deprecated since 1.5.0 and will be removed in 1.7.0. Use codecs instead.
        value_compressor: the compressor for value aggregating zarr array. Default is None.
            Deprecated since 1.5.0 and will be removed in 1.7.0. Use value_codecs instead.
        count_compressor: the compressor for sample counting zarr array. Default is None.
            Deprecated since 1.5.0 and will be removed in 1.7.0. Use count_codecs instead.
        codecs: the codecs for final merged zarr array. Default is None.
            For zarr v3, this is a list of codec configurations. See zarr documentation for details.
        value_codecs: the codecs for value aggregating zarr array. Default is None.
            For zarr v3, this is a list of codec configurations. See zarr documentation for details.
        count_codecs: the codecs for sample counting zarr array. Default is None.
            For zarr v3, this is a list of codec configurations. See zarr documentation for details.
        chunks : int or tuple of ints that defines the chunk shape, or boolean. Default is True.
            If True, chunk shape will be guessed from `shape` and `dtype`.
            If False, it will be set to `shape`, i.e., single chunk for the whole array.
            If an int, the chunk size in each dimension will be given by the value of `chunks`.
    """

    @deprecated_arg(
        name="compressor", since="1.5.0", removed="1.7.0", new_name="codecs", msg_suffix="Please use 'codecs' instead."
    )
    @deprecated_arg(
        name="value_compressor",
        since="1.5.0",
        removed="1.7.0",
        new_name="value_codecs",
        msg_suffix="Please use 'value_codecs' instead.",
    )
    @deprecated_arg(
        name="count_compressor",
        since="1.5.0",
        removed="1.7.0",
        new_name="count_codecs",
        msg_suffix="Please use 'count_codecs' instead.",
    )
    def __init__(
        self,
        merged_shape: Sequence[int],
        cropped_shape: Sequence[int] | None = None,
        dtype: np.dtype | str = "float32",
        value_dtype: np.dtype | str = "float32",
        count_dtype: np.dtype | str = "uint8",
        store: zarr.storage.Store | str = "merged.zarr",  # type: ignore
        value_store: zarr.storage.Store | str | None = None,  # type: ignore
        count_store: zarr.storage.Store | str | None = None,  # type: ignore
        compressor: str | None = None,
        value_compressor: str | None = None,
        count_compressor: str | None = None,
        codecs: list | None = None,
        value_codecs: list | None = None,
        count_codecs: list | None = None,
        chunks: Sequence[int] | bool = True,
        thread_locking: bool = True,
    ) -> None:
        super().__init__(merged_shape=merged_shape, cropped_shape=cropped_shape)
        if not self.merged_shape:
            raise ValueError(f"`merged_shape` must be provided for `ZarrAvgMerger`. {self.merged_shape} is give.")
        self.output_dtype = dtype
        self.value_dtype = value_dtype
        self.count_dtype = count_dtype
        self.store = store
        # Use separate tmpdir attributes so both TemporaryDirectory objects remain alive
        # for the full lifetime of this instance. Previously a single `self.tmpdir` was
        # overwritten when both value_store and count_store were None, causing the first
        # TemporaryDirectory to be garbage-collected immediately.
        self.value_tmpdir: TemporaryDirectory | None = None
        self.count_tmpdir: TemporaryDirectory | None = None

        # Handle zarr v3 vs older versions
        is_zarr_v3 = version_geq(get_package_version("zarr"), "3.0.0")

        if is_zarr_v3:
            if value_store is None:
                self.value_tmpdir = TemporaryDirectory()
                self.value_store = zarr.storage.LocalStore(self.value_tmpdir.name)  # type: ignore
            else:
                self.value_store = value_store  # type: ignore
            if count_store is None:
                self.count_tmpdir = TemporaryDirectory()
                self.count_store = zarr.storage.LocalStore(self.count_tmpdir.name)  # type: ignore
            else:
                self.count_store = count_store  # type: ignore
        else:
            self.value_store = zarr.storage.TempStore() if value_store is None else value_store  # type: ignore
            self.count_store = zarr.storage.TempStore() if count_store is None else count_store  # type: ignore

        self.chunks = chunks

        # Initialize codecs/compressor attributes with proper types
        self.codecs: list | None = None
        self.value_codecs: list | None = None
        self.count_codecs: list | None = None

        if is_zarr_v3:
            # For zarr v3, use codecs or convert compressor to codecs
            if codecs is not None:
                self.codecs = codecs
            elif compressor is not None:
                self.codecs = compressor if isinstance(compressor, (list, tuple)) else [compressor]
            else:
                self.codecs = None

            if value_codecs is not None:
                self.value_codecs = value_codecs
            elif value_compressor is not None:
                self.value_codecs = value_compressor if isinstance(value_compressor, (list, tuple)) else [value_compressor]
            else:
                self.value_codecs = None

            if count_codecs is not None:
                self.count_codecs = count_codecs
            elif count_compressor is not None:
                self.count_codecs = count_compressor if isinstance(count_compressor, (list, tuple)) else [count_compressor]
            else:
                self.count_codecs = None
        else:
            # For zarr v2, use compressors
            self.codecs = codecs[0] if isinstance(codecs, (list, tuple)) else codecs if codecs is not None else compressor  # type: ignore[assignment]
            self.value_codecs = value_codecs[0] if isinstance(value_codecs, (list, tuple)) else value_codecs if value_codecs is not None else value_compressor  # type: ignore[assignment]
            self.count_codecs = count_codecs[0] if isinstance(count_codecs, (list, tuple)) else count_codecs if count_codecs is not None else count_compressor  # type: ignore[assignment]

        # Create zarr arrays with appropriate parameters based on version
        if is_zarr_v3:
            self.output = zarr.empty(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.output_dtype,
                codecs=self.codecs,
                store=self.store,
                overwrite=True,
            )
            self.values = zarr.zeros(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.value_dtype,
                codecs=self.value_codecs,
                store=self.value_store,
                overwrite=True,
            )
            self.counts = zarr.zeros(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.count_dtype,
                codecs=self.count_codecs,
                store=self.count_store,
                overwrite=True,
            )
        else:
            self.output = zarr.empty(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.output_dtype,
                compressor=self.codecs,
                store=self.store,
                overwrite=True,
            )
            self.values = zarr.zeros(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.value_dtype,
                compressor=self.value_codecs,
                store=self.value_store,
                overwrite=True,
            )
            self.counts = zarr.zeros(
                shape=self.merged_shape,
                chunks=self.chunks,
                dtype=self.count_dtype,
                compressor=self.count_codecs,
                store=self.count_store,
                overwrite=True,
            )

        self.lock: threading.Lock | nullcontext
        if thread_locking:
            # use lock to protect the in-place addition during aggregation
            self.lock = threading.Lock()
        else:
            # use nullcontext to avoid the locking if not needed
            self.lock = nullcontext()

    def aggregate(self, values: torch.Tensor, location: Sequence[int]) -> None:
        """
        Aggregate values for merging.

        Args:
            values: a tensor of shape BCHW[D], representing the values of inference output.
            location: a tuple/list giving the top left location of the patch in the original image.
        """
        if self.is_finalized:
            raise ValueError("`ZarrAvgMerger` is already finalized. Please instantiate a new object to aggregate.")
        patch_size = values.shape[2:]
        map_slice = tuple(slice(loc, loc + size) for loc, size in zip(location, patch_size))
        map_slice = ensure_tuple_size(map_slice, values.ndim, pad_val=slice(None), pad_from_start=True)
        with self.lock:
            self.values[map_slice] += values.numpy()
            self.counts[map_slice] += 1  # type: ignore[operator]

    def finalize(self) -> zarr.Array:
        """
        Finalize merging by dividing values by counts and return the merged tensor.

        Notes:
            To avoid creating a new tensor for the final results (to save memory space),
            after this method is called, `get_values()` method will return the "final" averaged values,
            and not the accumulating values. Also calling `finalize()` multiple times does not have any effect.

        Returns:
            zarr.Array: a zarr array of of merged patches
        """
        # guard against multiple calls to finalize
        if not self.is_finalized:
            # use chunks for division to fit into memory
            for chunk in iterate_over_chunks(self.values.chunks, self.values.cdata_shape):
                self.output[chunk] = self.values[chunk] / self.counts[chunk]  # type: ignore[operator]
            # finalize the shape
            self.output.resize(self.cropped_shape)
            # set finalize flag to protect performing in-place division again
            self.is_finalized = True

        return self.output

    def get_output(self) -> zarr.Array:
        """
        Get the final merged output.

        Returns:
            zarr.Array: Merged (averaged) output tensor.
        """
        return self.output

    def get_values(self) -> zarr.Array:
        """
        Get the accumulated values during aggregation

        Returns:
            zarr.Array: aggregated values.

        """
        return self.values

    def get_counts(self) -> zarr.Array:
        """
        Get the aggregator tensor for number of samples.

        Returns:
            zarr.Array: Number of accumulated samples at each location.
        """
        return self.counts


def iterate_over_chunks(chunks, cdata_shape, slice_tuple=()):
    """
    Iterate over chunks of a given shape.

    Args:
        chunks: the chunk shape
        cdata_shape: the shape of the data in chunks
        slice_tuple: the slice tuple to be used for indexing

    Raises:
        ValueError: When the length of chunks and cdata_shape are not the same.

    Yields:
        slices of the data
    """
    if len(chunks) != len(cdata_shape):
        raise ValueError("chunks and cdata_shape must have the same length")
    if len(chunks) == 1:
        for i in range(cdata_shape[0]):
            yield slice_tuple + (slice(i * chunks[0], (i + 1) * chunks[0]),)
    else:
        for i in range(cdata_shape[0]):
            yield from iterate_over_chunks(
                chunks[1:], cdata_shape[1:], slice_tuple + (slice(i * chunks[0], (i + 1) * chunks[0]),)
            )