diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 9340945f..19b50601 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -97,7 +97,7 @@ import weakref -_view_registry: weakref.WeakSet = weakref.WeakSet() +_view_registry: weakref.WeakValueDictionary = weakref.WeakValueDictionary() def cleanup(): @@ -107,7 +107,7 @@ def cleanup(): Also cleanup all View objects before finalization """ - for view in list(_view_registry): + for view in list(_view_registry.values()): try: if hasattr(view, "array"): view.array = None diff --git a/pykokkos/core/cpp_setup.py b/pykokkos/core/cpp_setup.py index 350f607b..935648b9 100644 --- a/pykokkos/core/cpp_setup.py +++ b/pykokkos/core/cpp_setup.py @@ -4,6 +4,7 @@ import shutil import subprocess import sys +import sysconfig from types import ModuleType from typing import List, Tuple @@ -193,23 +194,40 @@ def generate_cmake( :returns: tuple of (cmake_args, module_name) """ - view_space: str = "Kokkos::HostSpace" + # Handle the execution spaces where the default exec space is host, but + # default space is not host (memory space depends on default exec space) + default_space: ExecutionSpace = km.get_default_space() + + # Determine view memory space and layout + view_space: str + view_layout: str + if space is ExecutionSpace.Cuda: - if enable_uvm: - view_space = "Kokkos::CudaUVMSpace" - if space is ExecutionSpace.HIP: - if enable_uvm: - view_space = "Kokkos::Experimental::HIPManagedSpace" - - space_value: str - if space.value == "HIP": - space_value = "Experimental::HIP" + view_space = "Kokkos::CudaUVMSpace" if enable_uvm else "Kokkos::CudaSpace" + view_layout = "Kokkos::LayoutLeft" + elif space is ExecutionSpace.HIP: + view_space = ( + "Kokkos::Experimental::HIPManagedSpace" + if enable_uvm + else "Kokkos::Experimental::HIPSpace" + ) + view_layout = "Kokkos::LayoutLeft" + elif is_host_execution_space(space): + if not is_host_execution_space(default_space): + # Host policy with GPU default: use caller's view memory/layout + mem_space = get_default_memory_space(default_space) + prefix = "Kokkos::Experimental" if "HIP" in mem_space.name else "Kokkos" + view_space = f"{prefix}::{mem_space.name}" + view_layout = "Kokkos::LayoutLeft" + else: + # Host policy with host default + view_space = "Kokkos::HostSpace" + view_layout = "Kokkos::LayoutRight" else: - space_value = space.value + view_space = "Kokkos::HostSpace" + view_layout = "Kokkos::LayoutRight" - view_layout: str = str(get_default_layout(get_default_memory_space(space))) - view_layout = view_layout.split(".")[-1] - view_layout = f"Kokkos::{view_layout}" + space_value: str = "Experimental::HIP" if space.value == "HIP" else space.value precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path @@ -499,4 +517,7 @@ def is_compiled(output_dir: Path) -> bool: :returns: true if compiled """ - return output_dir.is_dir() + if not output_dir.is_dir(): + return False + ext: str = sysconfig.get_config_var("EXT_SUFFIX") or ".so" + return (output_dir / f"kernel{ext}").is_file() diff --git a/pykokkos/core/keywords.py b/pykokkos/core/keywords.py index d2d52181..1ff69564 100644 --- a/pykokkos/core/keywords.py +++ b/pykokkos/core/keywords.py @@ -14,6 +14,7 @@ class Keywords(Enum): ThreadsBegin = "pk_threads_begin" ThreadsEnd = "pk_threads_end" ArgMemSpace = "pk_arg_memspace" + ArgLayout = "pk_arg_layout" DefaultExecSpace = "pk_exec_space" DefaultExecSpaceInstance = "pk_exec_space_instance" KernelName = "pk_kernel_name" diff --git a/pykokkos/core/module_setup.py b/pykokkos/core/module_setup.py index b05aabb5..38736450 100644 --- a/pykokkos/core/module_setup.py +++ b/pykokkos/core/module_setup.py @@ -7,7 +7,7 @@ import sysconfig from typing import Callable, List, Optional, Set, Union -from pykokkos.interface import ExecutionSpace +from pykokkos.interface import ExecutionSpace, is_host_execution_space import pykokkos.kokkos_manager as km from .cpp_setup import CppSetup @@ -181,7 +181,15 @@ def get_output_dir( if restrict_signature is not None: out_dir = out_dir / f"restrict_{restrict_signature}" - out_dir = out_dir / space.value + # Serial + default Cuda uses different PK_ARG_* than Serial + default + # OpenMP, so we need to separate JIT dirs (Serial_caller_Cuda) + dir_name: str = space.value + if is_host_execution_space(space): + default_space: ExecutionSpace = km.get_default_space() + if not is_host_execution_space(default_space): + dir_name = f"{dir_name}_caller_{default_space.value}" + + out_dir = out_dir / dir_name return out_dir diff --git a/pykokkos/core/translators/bindings.py b/pykokkos/core/translators/bindings.py index bff7b3b5..9faad1bb 100644 --- a/pykokkos/core/translators/bindings.py +++ b/pykokkos/core/translators/bindings.py @@ -83,7 +83,13 @@ def get_kernel_params( continue space: str = get_view_memory_space(t, "bindings") - layout: str = f"{Keywords.DefaultExecSpace.value}::array_layout" + # Use PK_ARG_LAYOUT when space is pk_arg_memspace. Serial/OpenMP policy + # uses pk_exec_space::array_layout (LayoutRight) but default CUDA views + # are LayoutLeft + if space == Keywords.ArgMemSpace.value: + layout: str = Keywords.ArgLayout.value + else: + layout = f"{Keywords.DefaultExecSpace.value}::array_layout" params[n.declname] = cpp_view_type(t, space=space, layout=layout, real=real) params[Keywords.DefaultExecSpaceInstance.value] = Keywords.DefaultExecSpace.value @@ -141,6 +147,39 @@ def get_device_views(members: PyKokkosMembers) -> Dict[str, str]: } +def _generate_mirror_with_exec_layout( + src: str, + dst: str, + view_type: cppast.ClassType, + real: Optional[str], +) -> str: + """ + Generate code to create a mirror view with execution space layout from + the source view. + + :param src: the name of the source view + :param dst: the name of the destination view + :param view_type: the cppast representation of the view type + :param real: optionally provide the precision of pk.real dtypes + :returns: the generated C++ code + """ + exec_space: str = Keywords.DefaultExecSpace.value + dst_type: str = cpp_view_type( + view_type, + space=f"{exec_space}::memory_space", + layout=f"{exec_space}::array_layout", + real=real, + ) + rank: int = int(re.search(r"\d+", view_type.typename).group()) + extents: str = ",".join(f"{src}.extent({i})" for i in range(rank)) + code: str = ( + f'{dst_type} {dst}(' + f'Kokkos::view_alloc("{dst}", Kokkos::WithoutInitializing), {extents});' + f"Kokkos::deep_copy({dst}, {src});" + ) + return code + + def generate_functor_instance( functor: str, members: PyKokkosMembers, @@ -182,11 +221,13 @@ def generate_functor_instance( get_view_memory_space(view_type, "bindings") == Keywords.ArgMemSpace.value ): - mirror_views += f"auto {d_v} = Kokkos::create_mirror_view_and_copy({exec_space_instance}, {v});" + mirror_views += _generate_mirror_with_exec_layout(v, d_v, view_type, None) else: mirror_views += f"auto {d_v} = {v};" else: - mirror_views += f"auto {d_v} = Kokkos::create_mirror_view_and_copy({exec_space_instance}, {v});" + mirror_views += _generate_mirror_with_exec_layout( + v, d_v, members.views[cppast.DeclRefExpr(v)], None + ) # Kokkos fails to compile a functor if there are no parameters in its constructor if len(args) == 0: diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index e1f4afec..61d71f0f 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -310,7 +310,7 @@ def __init__( try: from pykokkos import _view_registry - _view_registry.add(self) + _view_registry[id(self)] = self except (ImportError, AttributeError): pass