From da081400dcaa217a0f7343243eaa915700e2f9a9 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Wed, 7 May 2025 10:23:48 -0400 Subject: [PATCH 1/7] Add platform detection for AWS and Mac --- src/swell/deployment/platforms/platforms.py | 45 +++++++++++++-------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/src/swell/deployment/platforms/platforms.py b/src/swell/deployment/platforms/platforms.py index 3d7df432f..88115f560 100644 --- a/src/swell/deployment/platforms/platforms.py +++ b/src/swell/deployment/platforms/platforms.py @@ -13,6 +13,7 @@ import yaml from enum import Enum import subprocess +import platform as pltfrm from importlib import resources @@ -91,32 +92,44 @@ class SwellPlatform(Enum): ''' Store filepaths for platform defaults. ''' NCCS_DISCOVER_SLES15 = os.path.join(platform_path(), 'nccs_discover_sles15') NCCS_DISCOVER_CASCADE = os.path.join(platform_path(), 'nccs_discover') + AWS = os.path.join(platform_path(), 'aws') + MAC = os.path.join(platform_path(), 'mac') GENERIC = os.path.join(platform_path(), 'generic') @classmethod def detect_platform(cls): - ''' Detect the current platform, or return generic (NCCS only). ''' + ''' Detect the current platform, or return generic. ''' # Try to get the hostname hostname = os.environ.get('HOSTNAME') - if hostname is None or not any(key in hostname for key in ['discover', 'borg', 'warp']): - return cls.GENERIC + os_name = pltfrm.platform() - # Try the lscpu shell command, which should be available across NCCS - try: - cpu_info = str(subprocess.run('lscpu', capture_output=True).stdout) + if hostname is not None: - model_name = cpu_info.split('Model name:')[1].strip().split('\n')[0].strip() + # Check for Discover hostnames + if any(key in hostname for key in ['discover', 'borg', 'warp']): - # Match the cpu to the expected platform - if all(key in model_name for key in ['Intel', 'Xeon']): - return cls.NCCS_DISCOVER_CASCADE - elif all(key in model_name for key in ['AMD', 'EPYC']): - return cls.NCCS_DISCOVER_SLES15 - else: - return cls.GENERIC + try: + # Try the lscpu shell command, which should be available across NCCS + cpu_info = str(subprocess.run('lscpu', capture_output=True).stdout) - except (FileNotFoundError, IndexError): - return cls.GENERIC + model_name = cpu_info.split('Model name:')[1].strip().split('\n')[0].strip() + + # Match the cpu to the expected platform + if all(key in model_name for key in ['Intel', 'Xeon']): + return cls.NCCS_DISCOVER_CASCADE + elif all(key in model_name for key in ['AMD', 'EPYC']): + return cls.NCCS_DISCOVER_SLES15 + + except (FileNotFoundError, IndexError): + return cls.GENERIC + + # Check for AWS + if all(key in os_name for key in ['Linux', 'aws']): + return cls.AWS + + # Check for Mac + if all(key in os_name for key in ['macOS', 'arm64']): + return cls.MAC # -------------------------------------------------------------------------------------------------- From 4a95a8c2d11d651eb3a66572ae6b6a5d5530db54 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Wed, 7 May 2025 10:59:34 -0400 Subject: [PATCH 2/7] Automatically set platform based on host --- src/swell/cylc_swell.py | 4 +- src/swell/deployment/platforms/platforms.py | 45 ++++++++++----------- src/swell/swell.py | 17 ++++---- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/swell/cylc_swell.py b/src/swell/cylc_swell.py index f07c5d6b7..6470c4400 100644 --- a/src/swell/cylc_swell.py +++ b/src/swell/cylc_swell.py @@ -12,7 +12,7 @@ import os import sys -from swell.deployment.platforms.platforms import SwellPlatform +from swell.deployment.platforms.platforms import SwellPlatforms from swell.utilities.logger import Logger # -------------------------------------------------------------------------------------------------- @@ -58,7 +58,7 @@ def execute_cylc(argv=sys.argv) -> None: platform = SwellPlatform.detect_platform() # Location for Discover cylc installation - if platform in [SwellPlatform.NCCS_DISCOVER_CASCADE, SwellPlatform.NCCS_DISCOVER_SLES15]: + if platform in [SwellPlatforms.NCCS_DISCOVER_CASCADE, SwellPlatforms.NCCS_DISCOVER_SLES15]: opt = '/discover/nobackup/projects/gmao/advda/swell/dev/core/cylc/sles15_8.4.0/' python_ver = 'python3.11' diff --git a/src/swell/deployment/platforms/platforms.py b/src/swell/deployment/platforms/platforms.py index 88115f560..afaaf49e3 100644 --- a/src/swell/deployment/platforms/platforms.py +++ b/src/swell/deployment/platforms/platforms.py @@ -14,6 +14,7 @@ from enum import Enum import subprocess import platform as pltfrm +from typing import Self from importlib import resources @@ -31,22 +32,6 @@ def platform_path() -> str: # -------------------------------------------------------------------------------------------------- -def get_platforms() -> list: - - # Get list of supported platforms - platforms = [dir for dir in os.listdir(platform_path()) - if os.path.isdir(os.path.join(platform_path(), dir))] - - # If anything in platforms contains '__' remove it from platforms list - platforms = [platform for platform in platforms if '__' not in platform] - - # List all directories in directory - return platforms - - -# -------------------------------------------------------------------------------------------------- - - def login_or_compute(platform) -> str: ''' @@ -88,13 +73,13 @@ def login_or_compute(platform) -> str: # -------------------------------------------------------------------------------------------------- -class SwellPlatform(Enum): - ''' Store filepaths for platform defaults. ''' - NCCS_DISCOVER_SLES15 = os.path.join(platform_path(), 'nccs_discover_sles15') - NCCS_DISCOVER_CASCADE = os.path.join(platform_path(), 'nccs_discover') - AWS = os.path.join(platform_path(), 'aws') - MAC = os.path.join(platform_path(), 'mac') - GENERIC = os.path.join(platform_path(), 'generic') +class SwellPlatforms(Enum): + ''' Track platforms supported by Swell. ''' + NCCS_DISCOVER_SLES15 = 'nccs_discover_sles15' + NCCS_DISCOVER_CASCADE = 'nccs_discover_cascade' + AWS = 'aws' + MAC = 'mac' + GENERIC = 'generic' @classmethod def detect_platform(cls): @@ -132,4 +117,18 @@ def detect_platform(cls): if all(key in os_name for key in ['macOS', 'arm64']): return cls.MAC + # -------------------------------------------------------------------------------------------------- + + @classmethod + def get_all(cls) -> list: + return [item.value for item in cls] + + # -------------------------------------------------------------------------------------------------- + + @classmethod + def match_name(cls, name: str) -> Self: + # Return the enum instance based on the name + return getattr(cls, name.upper()) + + # -------------------------------------------------------------------------------------------------- diff --git a/src/swell/swell.py b/src/swell/swell.py index c4b9105a2..2b3f18ddc 100644 --- a/src/swell/swell.py +++ b/src/swell/swell.py @@ -11,7 +11,7 @@ import click from typing import Union, Optional, Literal -from swell.deployment.platforms.platforms import get_platforms +from swell.deployment.platforms.platforms import SwellPlatforms as platforms from swell.deployment.create_experiment import clone_config, create_experiment_directory from swell.deployment.launch_experiment import launch_experiment from swell.tasks.base.task_base import task_wrapper, get_tasks @@ -55,7 +55,7 @@ def swell_driver() -> None: platform_help = 'If using defaults for input_method, this option is used to determine which ' + \ 'platform to use for platform specific defaults. Options are ' + \ - str(get_platforms()) + str(platforms.get_all()) override_help = 'After generating the config file, parameters inside can be overridden ' + \ 'using values from the override config file.' @@ -90,8 +90,8 @@ def swell_driver() -> None: @click.argument('suite', type=click.Choice(AllSuites.config_names())) @click.option('-m', '--input_method', 'input_method', default='defaults', type=click.Choice(['defaults', 'cli']), help=input_method_help) -@click.option('-p', '--platform', 'platform', default='nccs_discover_sles15', - type=click.Choice(get_platforms()), help=platform_help) +@click.option('-p', '--platform', 'platform', default=platforms.detect_platform().value, + type=click.Choice(platforms.get_all()), help=platform_help) @click.option('-o', '--override', 'override', default=None, help=override_help) @click.option('-a', '--advanced', 'advanced', default=False, help=advanced_help) @click.option('-s', '--slurm', 'slurm', default=None, help=slurm_help) @@ -112,6 +112,7 @@ def create( suite (str): Name of the suite you wish to run. \n """ + # Create the experiment directory create_experiment_directory(suite, input_method, platform, override, advanced, slurm) @@ -244,8 +245,8 @@ def test(test: str) -> None: @swell_driver.command() -@click.option('-p', '--platform', 'platform', type=click.Choice(get_platforms()), - default="nccs_discover_sles15", help=platform_help) +@click.option('-p', '--platform', 'platform', type=click.Choice(platforms.get_all()), + default=platforms.detect_platform().value, help=platform_help) @click.argument('suite', type=click.Choice(("hofx", "3dvar", "ufo_testing"))) def t1test( suite: Literal["hofx", "3dvar", "ufo_testing"], @@ -264,8 +265,8 @@ def t1test( @swell_driver.command() -@click.option('-p', '--platform', 'platform', type=click.Choice(get_platforms()), - default="nccs_discover_sles15", help=platform_help) +@click.option('-p', '--platform', 'platform', type=click.Choice(platforms.get_all()), + default=platforms.detect_platform().value, help=platform_help) @click.argument('suite', type=click.Choice(("hofx", "3dvar", "ufo_testing", "convert_ncdiags", "3dfgat_atmos", "build_jedi"))) def t2test( From 48ac7c75a1b205a427cdbba442ccb43edabacce0 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Wed, 7 May 2025 11:25:42 -0400 Subject: [PATCH 3/7] Fix for CI tests --- src/swell/deployment/platforms/platforms.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/swell/deployment/platforms/platforms.py b/src/swell/deployment/platforms/platforms.py index afaaf49e3..fbe9c6818 100644 --- a/src/swell/deployment/platforms/platforms.py +++ b/src/swell/deployment/platforms/platforms.py @@ -117,6 +117,8 @@ def detect_platform(cls): if all(key in os_name for key in ['macOS', 'arm64']): return cls.MAC + return cls.GENERIC + # -------------------------------------------------------------------------------------------------- @classmethod From d992c1c3981f0fc4800310bd89ce82d4614c85f2 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Thu, 8 May 2025 16:32:38 -0400 Subject: [PATCH 4/7] bugfix for cylc --- src/swell/cylc_swell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swell/cylc_swell.py b/src/swell/cylc_swell.py index 6470c4400..f86479387 100644 --- a/src/swell/cylc_swell.py +++ b/src/swell/cylc_swell.py @@ -55,7 +55,7 @@ def execute_cylc(argv=sys.argv) -> None: logger = Logger('SwellCylcEntryPoint') - platform = SwellPlatform.detect_platform() + platform = SwellPlatforms.detect_platform() # Location for Discover cylc installation if platform in [SwellPlatforms.NCCS_DISCOVER_CASCADE, SwellPlatforms.NCCS_DISCOVER_SLES15]: From d338dd37ef1ab9d718acfa5b5d1a30f8023bd718 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Thu, 8 May 2025 17:20:22 -0400 Subject: [PATCH 5/7] Raise errors for unsupported platforms --- src/swell/deployment/platforms/platforms.py | 26 ++++++++++----------- src/swell/swell.py | 19 +++++++++++---- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/swell/deployment/platforms/platforms.py b/src/swell/deployment/platforms/platforms.py index fbe9c6818..0a87050f9 100644 --- a/src/swell/deployment/platforms/platforms.py +++ b/src/swell/deployment/platforms/platforms.py @@ -78,8 +78,6 @@ class SwellPlatforms(Enum): NCCS_DISCOVER_SLES15 = 'nccs_discover_sles15' NCCS_DISCOVER_CASCADE = 'nccs_discover_cascade' AWS = 'aws' - MAC = 'mac' - GENERIC = 'generic' @classmethod def detect_platform(cls): @@ -100,24 +98,24 @@ def detect_platform(cls): model_name = cpu_info.split('Model name:')[1].strip().split('\n')[0].strip() - # Match the cpu to the expected platform - if all(key in model_name for key in ['Intel', 'Xeon']): - return cls.NCCS_DISCOVER_CASCADE - elif all(key in model_name for key in ['AMD', 'EPYC']): - return cls.NCCS_DISCOVER_SLES15 - except (FileNotFoundError, IndexError): - return cls.GENERIC + raise ValueError('NCCS Discover hostname detected, but failed to ' + 'automatically detect cpu type with "lscpu".') + + # Match the cpu to the expected platform + if all(key in model_name for key in ['Intel', 'Xeon']): + return cls.NCCS_DISCOVER_CASCADE + elif all(key in model_name for key in ['AMD', 'EPYC']): + return cls.NCCS_DISCOVER_SLES15 + else: + raise ValueError(f'NCCS Discover hostname detected, but CPU model ' + f'{model_name} does not match any known node types') # Check for AWS if all(key in os_name for key in ['Linux', 'aws']): return cls.AWS - # Check for Mac - if all(key in os_name for key in ['macOS', 'arm64']): - return cls.MAC - - return cls.GENERIC + raise ValueError(f'Unknown or unsupported platform: {os_name}.') # -------------------------------------------------------------------------------------------------- diff --git a/src/swell/swell.py b/src/swell/swell.py index 2b3f18ddc..d62191254 100644 --- a/src/swell/swell.py +++ b/src/swell/swell.py @@ -90,7 +90,7 @@ def swell_driver() -> None: @click.argument('suite', type=click.Choice(AllSuites.config_names())) @click.option('-m', '--input_method', 'input_method', default='defaults', type=click.Choice(['defaults', 'cli']), help=input_method_help) -@click.option('-p', '--platform', 'platform', default=platforms.detect_platform().value, +@click.option('-p', '--platform', 'platform', default=None, type=click.Choice(platforms.get_all()), help=platform_help) @click.option('-o', '--override', 'override', default=None, help=override_help) @click.option('-a', '--advanced', 'advanced', default=False, help=advanced_help) @@ -113,6 +113,9 @@ def create( """ + if platform is None: + platform = platforms.detect_platform().value + # Create the experiment directory create_experiment_directory(suite, input_method, platform, override, advanced, slurm) @@ -246,11 +249,11 @@ def test(test: str) -> None: @swell_driver.command() @click.option('-p', '--platform', 'platform', type=click.Choice(platforms.get_all()), - default=platforms.detect_platform().value, help=platform_help) + default=None, help=platform_help) @click.argument('suite', type=click.Choice(("hofx", "3dvar", "ufo_testing"))) def t1test( suite: Literal["hofx", "3dvar", "ufo_testing"], - platform: Optional[str] = "nccs_discover_sles15" + platform: Optional[str] ) -> None: """ Run a particular swell suite from the tier 1 tests. @@ -258,6 +261,9 @@ def t1test( Arguments: suite (str): Name of the suite to run (e.g., hofx, 3dvar, ufo_testing) """ + if platform is None: + platform = platforms.detect_platform().value + run_suite(suite, platform, TestSuite.TIER1) @@ -266,13 +272,13 @@ def t1test( @swell_driver.command() @click.option('-p', '--platform', 'platform', type=click.Choice(platforms.get_all()), - default=platforms.detect_platform().value, help=platform_help) + default=None, help=platform_help) @click.argument('suite', type=click.Choice(("hofx", "3dvar", "ufo_testing", "convert_ncdiags", "3dfgat_atmos", "build_jedi"))) def t2test( suite: Literal["hofx", "3dvar", "ufo_testing", "convert_ncdiags", "3dfgat_atmos", "build_jedi"], - platform: Optional[str] = "nccs_discover_sles15" + platform: Optional[str] ) -> None: """ Run a particular swell suite from the tier 2 tests. @@ -280,6 +286,9 @@ def t2test( Arguments: suite (str): Name of the suite to run (e.g., hofx, 3dvar, ufo_testing) """ + if platform is None: + platform = platforms.detect_platform().value + run_suite(suite, platform, TestSuite.TIER2) From 072d785a8e361a5816059fb4f7775665608cb3b2 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Thu, 8 May 2025 17:25:02 -0400 Subject: [PATCH 6/7] Add support for cylc on AWS --- src/swell/cylc_swell.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/swell/cylc_swell.py b/src/swell/cylc_swell.py index f86479387..0e00b29c2 100644 --- a/src/swell/cylc_swell.py +++ b/src/swell/cylc_swell.py @@ -70,6 +70,11 @@ def execute_cylc(argv=sys.argv) -> None: subprocess.run(cylc_command, env=env) + elif platform == SwellPlatforms.AWS: + cylc_command = ['/usr/local/bin/cylc'] + sys.argv[1:] + + subprocess.run(cylc_command, env=env) + # Try just calling cylc from the path else: logger.warning('Platform not recognized, attempting to call Cylc executable from the path.') From 692798a6c0ee52b22edce749e0105df67302fd66 Mon Sep 17 00:00:00 2001 From: Michael Anstett Date: Thu, 8 May 2025 17:28:12 -0400 Subject: [PATCH 7/7] fix detection --- src/swell/cylc_swell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swell/cylc_swell.py b/src/swell/cylc_swell.py index 0e00b29c2..86f980bc2 100644 --- a/src/swell/cylc_swell.py +++ b/src/swell/cylc_swell.py @@ -73,7 +73,7 @@ def execute_cylc(argv=sys.argv) -> None: elif platform == SwellPlatforms.AWS: cylc_command = ['/usr/local/bin/cylc'] + sys.argv[1:] - subprocess.run(cylc_command, env=env) + subprocess.run(cylc_command) # Try just calling cylc from the path else: