Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
24efa26
Add S3 credentials and download helpers (#783)
ftgoktas May 15, 2026
61f21e0
Merge branch 'develop' into feature/fgoktas/s3-obs-ingest
ftgoktas May 15, 2026
9e459de
Fix S3 session cookies (#783)
ftgoktas May 15, 2026
d611e0c
Update yaml
ftgoktas May 15, 2026
f9697cd
Use authenticated session in HTTPS path
ftgoktas May 15, 2026
1610032
Try cmr method
ftgoktas May 15, 2026
8ecbe42
Update config for tempo
ftgoktas May 15, 2026
b8a95e1
Make NO2 uppercase
ftgoktas May 15, 2026
e033bf7
Fix abort when not in dry run
ftgoktas May 15, 2026
4fc7c10
Change default dates
ftgoktas May 15, 2026
cf43c31
Handle corrupt files
ftgoktas May 15, 2026
943a1cb
fix name
ftgoktas May 21, 2026
76f2a22
Remove dead code
ftgoktas May 21, 2026
f219411
Remove boto3
ftgoktas May 21, 2026
71a31fb
Add documentation (#783)
ftgoktas May 22, 2026
0511f9b
Fix pycode (#783)
ftgoktas May 22, 2026
c389bb1
Create StoreJdi class
ftgoktas Jun 2, 2026
76e28fb
Add experiment id
ftgoktas Jun 2, 2026
f74a266
Add cycle time
ftgoktas Jun 2, 2026
93d4042
Create a task file
ftgoktas Jun 2, 2026
d724c8c
Fix the path template
ftgoktas Jun 2, 2026
0a2c74c
Add dry_run and fix datetime in StoreJdi
ftgoktas Jun 2, 2026
828f439
Fix dry run
ftgoktas Jun 2, 2026
371b1a9
Fix PermissionError for symlink
ftgoktas Jun 2, 2026
4ff9640
Move StoreJdi class
ftgoktas Jun 3, 2026
6e5ee3c
Merge branch 'develop' into feature/fgoktas/ingest-geos-cf-jdi
ftgoktas Jun 3, 2026
fbc4079
Set dry run True
ftgoktas Jun 3, 2026
34731c9
Use strftime
ftgoktas Jun 4, 2026
96601a2
Rename files and docs related to r2d2_ingest
ftgoktas Jun 4, 2026
191e0f7
Renaming to r2d2_ingest
ftgoktas Jun 4, 2026
8c5e5ab
Change to generic names
ftgoktas Jun 4, 2026
b132413
Switch to save_background naming
ftgoktas Jun 4, 2026
5a799b4
Acquire model component
ftgoktas Jun 4, 2026
2573689
Add store_as_symlink to experiment yaml
ftgoktas Jun 4, 2026
8f8afc7
Get model name with a function
ftgoktas Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/_sidebar.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@
- [3DFGAT_marine_cycle]((examples/soca/3dfgat_marine_cycle.md))
- **R2D2 - Storing Data**
- [Understanding R2D2](examples/r2d2_intro.md)
- [Storing Observations to R2D2](examples/r2d2/ingest_obs.md)
- [Storing Observations to R2D2](examples/r2d2/r2d2_ingest.md)

- Configuration files in swell

- [Observation configuration](configs/observation_configuration.md)
- [R2D2 v3 credentials](configs/r2d2_v3_credentials.md)
- [SLURM configuration](configs/slurm_configuration.md)
- Model configuration:
- [Storing Observations to R2D2](examples/r2d2/ingest_obs.md)
- [Storing Observations to R2D2](examples/r2d2/r2d2_ingest.md)
- **SWELL Generic Suites**
- [Comparing two SWELL experiment outputs](examples/comparison_workflows.md)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Use this when IODA-formatted files are already available on the local filesystem

### Step 1: Add your observation to the suite config

Edit `src/swell/suites/ingest_obs/suite_config.py` and update (or create) the appropriate
Edit `src/swell/suites/r2d2_ingest/suite_config.py` and update (or create) the appropriate
section. For example for marine observations, update `ingest_obs_marine`:

```python
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/r2d2_intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
3. [R2D2 Concepts](#r2d2-concepts)
4. [How Swell Uses R2D2](#how-swell-uses-r2d2)
5. [Store & Fetch Quick Reference](#store--fetch-quick-reference)
6. [Storing Observations to R2D2](../examples/r2d2/ingest_obs.md)
6. [Storing Observations to R2D2](../examples/r2d2/r2d2_ingest.md)

---

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
DownloadObs-{{model_component}} => ConvertObsToIoda-{{model_component}}
BuildJediByLinking[^]? | BuildJedi[^] => ConvertObsToIoda-{{model_component}}
ConvertObsToIoda-{{model_component}} => IngestObs-{{model_component}}
{% elif ingest_background_pipeline %}
SaveBackground-{{model_component}}
{% else %}
IngestObs-{{model_component}}
{% endif %}
Expand Down Expand Up @@ -99,4 +101,10 @@
script = "swell task IngestObs $config -d $datetime -m {{model_component}}"
execution time limit = PT10M

{% if ingest_background_pipeline %}
[[SaveBackground-{{model_component}}]]
script = "swell task SaveBackground $config -d $datetime -m {{model_component}}"
execution time limit = PT30M
{% endif %}

{% endfor %}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

class SuiteConfig(QuestionContainer, Enum):

ingest_obs = QuestionList(
list_name="ingest_obs",
r2d2_ingest = QuestionList(
list_name="r2d2_ingest",
questions=[
sq.common,
qd.download_convert_pipeline(False)
Expand All @@ -25,7 +25,7 @@ class SuiteConfig(QuestionContainer, Enum):
ingest_obs_marine = QuestionList(
list_name="ingest_obs_marine",
questions=[
ingest_obs,
r2d2_ingest,
sq.marine,
qd.start_cycle_point("2023-07-02T06:00:00Z"),
qd.final_cycle_point("2023-07-03T06:00:00Z"),
Expand All @@ -43,7 +43,7 @@ class SuiteConfig(QuestionContainer, Enum):
ingest_obs_cf = QuestionList(
list_name="ingest_obs_cf",
questions=[
ingest_obs,
r2d2_ingest,
qd.start_cycle_point("2024-01-01T18:00:00Z"),
qd.final_cycle_point("2024-01-01T18:00:00Z"),
qd.model_components(['geos_cf']),
Expand All @@ -62,3 +62,23 @@ class SuiteConfig(QuestionContainer, Enum):
qd.dry_run(False),
]
)

ingest_background_cf = QuestionList(
list_name="ingest_background_cf",
questions=[
r2d2_ingest,
qd.start_cycle_point("2025-10-02T09:00:00Z"),
qd.final_cycle_point("2025-10-02T09:00:00Z"),
qd.cycle_times(['T09']),
qd.model_components(['geos_cf']),
qd.runahead_limit("P5"),
qd.ingest_background_pipeline(True),
],
geos_cf=[
qd.dry_run(True),
qd.background_source_path(),
qd.background_experiment(),
qd.horizontal_resolution(),
qd.store_as_symlink(True),
]
)
126 changes: 126 additions & 0 deletions src/swell/tasks/save_background.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# (C) Copyright 2021- United States Government as represented by the Administrator of the
Comment thread
ftgoktas marked this conversation as resolved.
# National Aeronautics and Space Administration. All Rights Reserved.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.


# --------------------------------------------------------------------------------------------------

from datetime import datetime as dt
from datetime import timedelta
import os
from r2d2 import store

from swell.tasks.base.task_base import taskBase
from swell.utilities.datetime_util import datetime_formats
from swell.utilities.r2d2 import load_r2d2_credentials

# --------------------------------------------------------------------------------------------------


class SaveBackground(taskBase):

def execute(self) -> None:
"""Ingest NRT background files into R2D2 as symlinks.

Designed for collections where background files already exist on a
shared filesystem and only need to be registered in R2D2 via symlinks
rather than copied. Currently used for the GEOS-CF JDI collection.

The collection contains 1-hourly instantaneous analysis files with a
single forecast run initialising at 09Z each day. Steps PT0H (valid
09Z) through PT23H (valid 08Z the following day) are ingested.

For every hourly step the source path is resolved by calling
``strftime`` on ``background_source_path``, the file is confirmed to
exist, and ``r2d2.store`` is called with ``store_as_symlink=True``.

Config keys (read from experiment YAML under the model component):

- ``background_source_path``: strftime path template, e.g.
``/css/gmao/geos-cf/NRTv2/priv/ana/Y%Y/M%m/D%d/
GEOS.cf.ana.jdi_inst_1hr_glo_C360x360x6_v72.%Y%m%d_%H%Mz.R0.nc4``
- ``background_experiment``: R2D2 experiment name (default ``geos_cf_v2``)
- ``horizontal_resolution``: R2D2 resolution string (default ``c360``)
- ``store_as_symlink``: if ``True`` (default), register files as symlinks
in R2D2 rather than copying them

The Cylc cycle point must be the forecast initialisation time,
e.g. ``2025-10-02T09:00:00Z``.
"""

# Load R2D2 credentials
load_r2d2_credentials(self.logger, self.platform())

dry_run = self.config.dry_run(True)
if dry_run:
self.logger.info('DRY RUN MODE - No files will be stored')

# Cycle time is the forecast initialisation time
forecast_start = dt.strptime(self.cycle_time(), datetime_formats['iso_format'])

model = self.get_model()
source_template = self.config.background_source_path()
experiment = self.config.background_experiment('geos_cf_v2')
resolution = self.config.horizontal_resolution('c360')
store_as_symlink = self.config.store_as_symlink(True)

stored = 0
skipped = 0

# 24 hourly steps: PT0H (valid at forecast_start) through PT23H
for hour_offset in range(24):
valid_time = forecast_start + timedelta(hours=hour_offset)
step = f'PT{hour_offset}H'

source_file = valid_time.strftime(source_template)

if not os.path.exists(source_file):
self.logger.warning(f'Background file not found, skipping: {source_file}')
skipped += 1
continue

if dry_run:
self.logger.info(
f' [DRY RUN] Would store step={step}: {os.path.basename(source_file)}')
stored += 1
continue

self.logger.info(f' Storing step={step}: {os.path.basename(source_file)}')

try:
store(
model=model,
item='forecast',
step=step,
experiment=experiment,
resolution=resolution,
date=forecast_start.strftime('%Y%m%d_%H%Mz'),
source_file=source_file,
file_extension='nc4',
file_type='bkg',
store_as_symlink=store_as_symlink,
)
except PermissionError as exc:
# R2D2 bug: after creating the symlink, file_util._set_permissions
# calls os.chmod which follows the symlink to the source file on
# the shared filesystem. Since we don't own that file, EPERM is
# raised, but the symlink and DB entry are both created successfully
# before the chmod. Verify the symlink before continuing.
# Only applies when store_as_symlink=True; a real copy never hits this.
r2d2_path = exc.filename
if (store_as_symlink
and r2d2_path
and os.path.islink(r2d2_path)
and os.readlink(r2d2_path) == source_file):
self.logger.warning(
f' chmod on symlink target raised PermissionError (R2D2 bug) '
f'— symlink verified: {os.path.basename(r2d2_path)} -> '
f'{os.path.basename(source_file)}')
else:
raise
stored += 1

verb = 'Would store' if dry_run else 'Stored'
self.logger.info(f'Background ingest complete: {verb} {stored} files, {skipped} skipped')
13 changes: 13 additions & 0 deletions src/swell/tasks/task_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,3 +875,16 @@ class TaskQuestions(QuestionContainer, Enum):
)

# --------------------------------------------------------------------------------------------------

SaveBackground = QuestionList(
list_name="SaveBackground",
questions=[
qd.dry_run(),
qd.background_source_path(),
qd.background_experiment(),
qd.horizontal_resolution(),
qd.store_as_symlink(),
]
)

# --------------------------------------------------------------------------------------------------
41 changes: 40 additions & 1 deletion src/swell/utilities/question_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,19 @@ class dry_run(TaskQuestion):

# --------------------------------------------------------------------------------------------------

@dataclass
class store_as_symlink(TaskQuestion):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! One quick comment as I'm working on my marine ingest PR. Can you make this key get used for ingest_obs as well?

default_value: bool = True
question_name: str = "store_as_symlink"
ask_question: bool = True
models: List[str] = mutable_field([
"all_models"
])
prompt: str = "Store background files as symlinks in R2D2 instead of copying them?"
widget_type: WType = WType.BOOLEAN

# --------------------------------------------------------------------------------------------------

@dataclass
class obs_to_ingest(TaskQuestion):
default_value: list = mutable_field([])
Expand Down Expand Up @@ -1762,7 +1775,33 @@ class window_type(TaskQuestion):
prompt: str = "Do you want to use a 3D or 4D (including FGAT) window?"
widget_type: WType = WType.STRING_DROP_LIST

# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------

@dataclass
class background_source_path(TaskQuestion):
default_value: str = (
'/css/gmao/geos-cf/NRTv2/priv/ana/Y%Y/M%m/D%d/'
'GEOS.cf.ana.jdi_inst_1hr_glo_C360x360x6_v72.%Y%m%d_%H%Mz.R0.nc4'
)
question_name: str = "background_source_path"
ask_question: bool = True
models: List[str] = mutable_field(['geos_cf'])
prompt: str = ("Path template for background files. Uses Python strftime format codes, "
"e.g. Y%Y/M%m/D%d gives Y2025/M10/D02 and %Y%m%d_%H%Mz gives "
"20251002_0900z.")
widget_type: WType = WType.STRING

# --------------------------------------------------------------------------------------------------

@dataclass
class ingest_background_pipeline(SuiteQuestion):
default_value: bool = False
question_name: str = "ingest_background_pipeline"
ask_question: bool = False
prompt: str = "Run the SaveBackground task to ingest background files into R2D2?"
widget_type: WType = WType.BOOLEAN

# --------------------------------------------------------------------------------------------------
@dataclass
class download_convert_pipeline(SuiteQuestion):
default_value: bool = False
Expand Down
Loading