diff --git a/docs/misc/prune-example-interval.txt b/docs/misc/prune-example-interval.txt new file mode 100644 index 0000000000..53ce26cd51 --- /dev/null +++ b/docs/misc/prune-example-interval.txt @@ -0,0 +1,91 @@ +borg prune visualized (count and interval mixed) +================================================================================ + +Assume it is 2026-06-04 16:00. You have been creating backup archives at 16:00 +on most days going back to late 2025, with pruning running immediately after +each archival. Todays archive has just been made and the following prune +operation is about to start. + +This example shows what would be kept/pruned when running the following prune +command. Note the yearly rule keeping _any two_ yearly archives. + +borg prune \ + --since '2026-06-04 16:00' \ + --keep-daily 1w \ + --keep-monthly 5m \ + --keep-yearly 2 + +Archives kept by the `--keep-daily` rule are marked by a "d", +archives kept by the `--keep-monthly` rule are marked by an "m", and +archives kept by the `--keep-yearly` rule are marked by a "y" to the +right. + +The first archive was made on 2025-11-15. You missed the backups on 2026-03-31 +and 2026-06-03. + + +Calendar view +------------- + 2025 + November December + 1 2 3 4 5 6 7 + 8 9 10 11 12 13 14 + 15y16 15 16 17 18 19 20 21 + 17 18 19 20 21 22 23 22 23 24 25 26 27 28 + 24 25 26 27 28 29 30 29 30 31y + + 2026 + January February March + 1 2 3 4 1 1 + 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8 +12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15 +19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22 +26 27 28 29 30 31m 23 24 25 26 27 28m 23 24 25 26 27 28 29 + 30m31 + + April May June + 1 2 3 4 5 1 2 3 1d 2d 3 4d + 6 7 8 9 10 11 12 4 5 6 7 8 9 10 +13 14 15 16 17 18 19 11 12 13 14 15 16 17 +20 21 22 23 24 25 26 18 19 20 21 22 23 24 +27 28 29 30m 25 26 27 28 29d30d31d + + +List view +--------- + +--keep-daily 1w --keep-monthly 5m --keep-yearly 2 +-------------------------------------------------------------- +1. 2026-06-04 1. 2026-04-30 1. 2025-12-31 +2. 2026-06-02 2. 2026-03-30 2. 2025-11-15 (oldest) +3. 2026-06-01 3. 2026-02-28 +4. 2026-05-31 4. 2026-01-31 +5. 2026-05-30 +6. 2026-05-29 + + +Notes +----- + +2026-06-03 was skipped, so no archive on that day. No compensation is made for +this, so the "daily" rule simply keeps one fewer archive. 2026-05-28 16:00 is +exactly one week before `--since` and so would be excluded and pruned in this +prune run. + +2026-03-31 was skipped, so 2026-03-30 is the monthly candidate for that month. +2025-12-31 16:00 is exactly 5 months (5 * 31 days) from today and so that day's +archive is no longer kept by the "monthly" rule but instead is now kept as the +first true yearly candidate. + +Since interval rules define time windows rather than competing for a fixed +number of slots, their interplay is simpler than count-based rules. An archive +is kept by an interval rule as long as it falls within the specified window; +the next rule simply considers whatever remains. + +Intervals and counts can be mixed freely. Yearly retention in this example is +done by retention count instead of intervals. A count rule paired with interval +rules behaves just as it would if all preceding rules were also counts: +Archives already kept by earlier rules are excluded from consideration. In this +example there is only one "true" yearly candidate, so the oldest archive at +2025-11-15 is kept. This oldest archive will be kept until the rolling backup +scheme reaches "steady state" (when all retention rules are fully satisfied). diff --git a/docs/misc/prune-example.txt b/docs/misc/prune-example.txt index d77451a858..bdbe0fc594 100644 --- a/docs/misc/prune-example.txt +++ b/docs/misc/prune-example.txt @@ -1,18 +1,17 @@ borg prune visualized ===================== -Assume it is 2016-01-01. Today's backup has not yet been made. You have -created at least one backup on each day in 2015 except on 2015-12-19 (no +Assume it is 2016-01-01. Today's archive has not yet been made. You have +created at least one archive on each day in 2015 except on 2015-12-19 (no backup was made on that day), and you started backing up with Borg on 2015-01-01. This is what borg prune --keep-daily 14 --keep-monthly 6 --keep-yearly 1 would keep. -Backups kept by the --keep-daily rule are marked by a "d" to the right, -backups kept by the --keep-monthly rule are marked by a "m" to the right, -and backups kept by the --keep-yearly rule are marked by a "y" to the -right. +Archives kept by the --keep-daily rule are marked by a "d" to the right, +archives kept by the --keep-monthly rule are marked by a "m" to the right, and +archives kept by the --keep-yearly rule are marked by a "y" to the right. Calendar view ------------- @@ -71,7 +70,7 @@ List view 10. 2015-12-22 11. 2015-12-21 12. 2015-12-20 - (no backup made on 2015-12-19) + (no archive made on 2015-12-19) 13. 2015-12-18 14. 2015-12-17 @@ -82,18 +81,22 @@ Notes 2015-12-31 is kept due to the --keep-daily 14 rule (because it is applied first), not due to the --keep-monthly or --keep-yearly rule. -The --keep-yearly 1 rule does not consider the December 31st backup because it -has already been kept due to the daily rule. There are no backups available -from previous years, so the --keep-yearly target of 1 backup is not satisfied. -Because of this, the 2015-01-01 archive (the oldest archive available) is kept. +Rules are applied in the order given: archives already kept by an earlier +rule are excluded from consideration by later rules. + +The --keep-yearly 1 rule does not consider the December 31st archive because it +has already been kept due to the daily rule. There are no archives from +previous years, so there are no "true" yearly candidates. The oldest archive at +2015-01-01 fills the remaining slot and will be kept until the rolling backup +scheme reaches "steady state" (when all retention rules are fully satisfied). The --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and Jun. December is -not considered for this rule, because that backup was already kept because of +not considered for this rule, because that archive was already kept because of the daily rule. -2015-12-17 is kept to satisfy the --keep-daily 14 rule, because no backup was -made on 2015-12-19. If a backup had been made on that day, it would not keep -the one from 2015-12-17. +2015-12-17 is kept to satisfy the --keep-daily 14 rule, because no archive was +made on 2015-12-19. If an archive had been made on that day, it would not keep +the archive from 2015-12-17. We did not include weekly, hourly, minutely, or secondly rules to keep this example simple. They all work in basically the same way. @@ -101,26 +104,26 @@ example simple. They all work in basically the same way. The weekly rule is easy to understand roughly, but hard to understand in all details. If you are interested, read "ISO 8601:2000 standard week-based year". -The 13weekly and 3monthly rules are two different strategies for keeping one backup -every quarter of a year. There are `multiple ways` to define a quarter-year; -borg prune recognizes two: +The 13weekly and 3monthly rules are two different strategies for keeping one +archive every quarter of a year. There are `multiple ways` to define a +quarter-year; borg prune recognizes two: -* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's +* --keep-13weekly keeps one archive every 13 weeks using ISO 8601:2000's definition of the week-based year. January 4th is always included in the first week of a year, and January 1st to 3rd may be in week 52 or 53 of the previous year. Week 53 is also in the fourth quarter of the year. -* --keep-3monthly keeps one backup every 3 months. January 1st to - March 31, April 1st to June 30th, July 1st to September 30th, and October 1st - to December 31st form the quarters. +* --keep-3monthly keeps one archive every 3 months. January 1st to March 31, + April 1st to June 30th, July 1st to September 30th, and October 1st to + December 31st form the quarters. If the subtleties of the definition of a quarter-year don't matter to you, a short summary of behavior is: -* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, Jul, +* --keep-13weekly favors keeping archives at the beginning of Jan, Apr, Jul, and Oct. -* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sep. -* Both strategies will have some overlap in which backups are kept. -* The differences are negligible unless backups considered for deletion were +* --keep-3monthly favors keeping archives at the end of Dec, Mar, Jun, and Sep. +* Both strategies will have some overlap in which archives are kept. +* The differences are negligible unless archives considered for deletion were created weekly or more frequently. .. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year diff --git a/docs/usage/general/date-time.rst.inc b/docs/usage/general/date-time.rst.inc index edd97c6a64..5496824a6e 100644 --- a/docs/usage/general/date-time.rst.inc +++ b/docs/usage/general/date-time.rst.inc @@ -10,9 +10,15 @@ Unless otherwise noted, we display local date and time. Internally, we store and process date and time as UTC. -.. rubric:: TIMESPAN +.. rubric:: TIMESPAN / INTERVAL -Some options accept a TIMESPAN parameter, which can be given as a number of -years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``), -days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), -or seconds (e.g. ``150S``). +Some options accept a TIMESPAN or an INTERVAL parameter, which can be given as +a number of years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``), +days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``), or seconds +(e.g. ``150S``). + +The ``borg prune`` ``--keep-*`` retention options accept either a plain count +(e.g. ``--keep-daily 7``, keeping up to 7 daily archives) or a time interval +(e.g. ``--keep-daily 7d``, keeping one daily archive per day within a 7-day window). +When using interval-based retention, ``--since`` may be specified to set the +reference timestamp for the interval (defaults to the current time). diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst index 74aeed49eb..5572ef922e 100644 --- a/docs/usage/prune.rst +++ b/docs/usage/prune.rst @@ -23,6 +23,13 @@ first, so you will see what it would do without it actually doing anything. Do not forget to run ``borg compact -v`` after prune to actually free disk space. +The ``--keep-*`` options accept either a **count** (e.g. ``--keep-daily 7``) or +a **time interval** (e.g. ``--keep-daily 7d``). A count keeps up to *N* archives +per period (e.g. the last 7 daily archives), while an interval keeps one +archive per period within that time span (e.g. one daily archive per day in the +last 7-day window). When using intervals, you may also specify ``--since`` to +set the reference timestamp for interval calculation. + :: # Keep 7 end of day and 4 additional end of week archives. @@ -44,8 +51,24 @@ Do not forget to run ``borg compact -v`` after prune to actually free disk space # and an end of month archive for every month: $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 -There is also a visualized prune example in ``docs/misc/prune-example.txt``: + # Keep daily archives from the last 7 days: + $ borg prune -v --list --dry-run --keep-daily=7d + + # Same as above, but with a fixed reference timestamp: + $ borg prune -v --list --dry-run --since 2025-12-01T00:00:00+02:00 --keep-daily=7d + + # Keep the last 14 archives using `--keep` (same as `--keep-last 14`): + $ borg prune -v --list --dry-run --keep 14 + + # Keep all archives from the last 30 days using `--keep` (same as `--keep-within 30d`): + $ borg prune -v --list --dry-run --keep 30d + +There are also visualized prune examples in ``docs/misc/prune-example.txt`` and +``docs/misc/prune-example-interval.txt``: .. highlight:: none .. include:: ../misc/prune-example.txt :literal: + +.. include:: ../misc/prune-example-interval.txt + :literal: diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index 47b7f86051..bf3fa7fd1a 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -1,117 +1,175 @@ -from collections import OrderedDict -from datetime import datetime, timezone, timedelta +from typing import Callable, NamedTuple +from datetime import datetime, timedelta import logging -from operator import attrgetter +import math +from functools import wraps import os - +from itertools import count, combinations from ._common import with_repository, Highlander from ..constants import * # NOQA -from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator +from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error +from ..helpers import archivename_validator, interval, int_or_interval, sig_int, timestamp from ..helpers import json_print, basic_json_data from ..helpers.argparsing import ArgumentParser -from ..manifest import Manifest +from ..manifest import ArchiveInfo, Manifest from ..logger import create_logger logger = create_logger() -def prune_within(archives, seconds, kept_because): - target = datetime.now(timezone.utc) - timedelta(seconds=seconds) - kept_counter = 0 - result = [] - for a in archives: - if a.ts > target: - kept_counter += 1 - kept_because[a.id] = ("within", kept_counter) - result.append(a) - return result +class PruningRule(NamedTuple): + key: str + period_func: Callable[[ArchiveInfo | datetime], str] + + def __str__(self): + return self.key + + +class KeepResult(NamedTuple): + rule: PruningRule + idx: int + oldest: bool = False + + def __str__(self): + return f"Keep(rule={self.rule}, idx={self.idx}{', oldest=True' if self.oldest else ''})" + + +def archive_datetime_dispatch(func: Callable[[datetime], str]) -> Callable[[ArchiveInfo | datetime], str]: + """ + Wraps a datetime-taking function with a dispatcher that can call that + function by extracting the timestamp from an archive. + """ + + @wraps(func) + def wrapper(arg): + if isinstance(arg, datetime): + return func(arg) + if isinstance(arg, ArchiveInfo): + return func(arg.ts) + raise TypeError(f"{func.__name__}(): expected datetime or Archive, " f"got {type(arg).__name__}") + + return wrapper -def default_period_func(pattern): - def inner(a): +# The *_period_func group of functions create period grouping keys to group +# together archives falling within a certain period. Among archives in each of +# these groups, only the latest (by creation timestamp) is kept. The values +# returned by these functions MUST be ordered the same as the input timestamp. + + +def unique_period_func(): + counter = count() + max_digits = math.ceil(math.log10(MAX_ARCHIVES)) + + @archive_datetime_dispatch + def unique_values(_dt): + """Group archives by an incrementing counter, practically making each archive a group of 1""" + return str(next(counter)).zfill(max_digits) + + return unique_values + + +def pattern_period_func(pattern): + @archive_datetime_dispatch + def inner(dt): + """Group archives by extracting given strftime-pattern from their creation timestamp""" # compute in local timezone - return a.ts.astimezone().strftime(pattern) + return dt.astimezone().strftime(pattern) return inner -def quarterly_13weekly_period_func(a): - (year, week, _) = a.ts.astimezone().isocalendar() # local time - if week <= 13: - # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7) - # days later. - return (year, 1) - elif 14 <= week <= 26: - # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91 - # days later. - return (year, 2) - elif 27 <= week <= 39: - # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th- - # at least 91 days later. - return (year, 3) - else: - # Everything else, Oct 3rd (leap year) or 4th onward, will always - # include week of Dec 26th (leap year) or Dec 27th, may also include - # up to possibly Jan 3rd of next year. - return (year, 4) - - -def quarterly_3monthly_period_func(a): - lt = a.ts.astimezone() # local time - if lt.month <= 3: - # 1-1 to 3-31 - return (lt.year, 1) - elif 4 <= lt.month <= 6: - # 4-1 to 6-30 - return (lt.year, 2) - elif 7 <= lt.month <= 9: - # 7-1 to 9-30 - return (lt.year, 3) +@archive_datetime_dispatch +def quarterly_13weekly_period_func(dt): + """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp""" + (year, week) = dt.astimezone().isocalendar()[:2] # local time + return f"{year}-{min(max((week - 1) // 13, 0), 3):02}" + + +@archive_datetime_dispatch +def quarterly_3monthly_period_func(dt): + """Group archives by extracting the 3-month quarter from their creation timestamp""" + (year, month) = dt.astimezone().timetuple()[:2] # local time + return f"{year}-{(month - 1) // 3:02}" + + +# Each archive is considered for keeping +PRUNE_WITHIN = PruningRule("within", unique_period_func()) +PRUNE_LAST = PruningRule("last", unique_period_func()) +PRUNE_KEEP = PruningRule("keep", unique_period_func()) +# Last archive (by creation timestamp) within period group is considered for keeping +PRUNE_SECONDLY = PruningRule("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")) +PRUNE_MINUTELY = PruningRule("minutely", pattern_period_func("%Y-%m-%d %H:%M")) +PRUNE_HOURLY = PruningRule("hourly", pattern_period_func("%Y-%m-%d %H")) +PRUNE_DAILY = PruningRule("daily", pattern_period_func("%Y-%m-%d")) +PRUNE_WEEKLY = PruningRule("weekly", pattern_period_func("%G-%V")) +PRUNE_MONTHLY = PruningRule("monthly", pattern_period_func("%Y-%m")) +PRUNE_QUARTERLY_13WEEKLY = PruningRule("quarterly_13weekly", quarterly_13weekly_period_func) +PRUNE_QUARTERLY_3MONTHLY = PruningRule("quarterly_3monthly", quarterly_3monthly_period_func) +PRUNE_YEARLY = PruningRule("yearly", pattern_period_func("%Y")) + +# Fake rule used to indicate archives skipped by --since +PRUNE_SINCE = PruningRule("skip", unique_period_func()) + +PRUNING_RULES = [ + PRUNE_WITHIN, + PRUNE_LAST, + PRUNE_KEEP, + PRUNE_SECONDLY, + PRUNE_MINUTELY, + PRUNE_HOURLY, + PRUNE_DAILY, + PRUNE_WEEKLY, + PRUNE_MONTHLY, + PRUNE_QUARTERLY_13WEEKLY, + PRUNE_QUARTERLY_3MONTHLY, + PRUNE_YEARLY, +] + + +def prune( + archives: list[ArchiveInfo], + rule: PruningRule, + n_or_interval: int | timedelta, + since_timestamp: datetime | None, + keep_oldest: bool, + previously_kept: dict[ArchiveInfo, KeepResult] = {}, +) -> dict[ArchiveInfo, KeepResult]: + if len(archives) == 0 or n_or_interval in (0, timedelta(0)): + return {} + + if isinstance(n_or_interval, int): + n, earliest_timestamp = n_or_interval, None else: - # 10-1 to 12-31 - return (lt.year, 4) - - -PRUNING_PATTERNS = OrderedDict( - [ - ("secondly", default_period_func("%Y-%m-%d %H:%M:%S")), - ("minutely", default_period_func("%Y-%m-%d %H:%M")), - ("hourly", default_period_func("%Y-%m-%d %H")), - ("daily", default_period_func("%Y-%m-%d")), - ("weekly", default_period_func("%G-%V")), - ("monthly", default_period_func("%Y-%m")), - ("quarterly_13weekly", quarterly_13weekly_period_func), - ("quarterly_3monthly", quarterly_3monthly_period_func), - ("yearly", default_period_func("%Y")), - ] -) - - -def prune_split(archives, rule, n, kept_because=None): - last = None - keep = [] - period_func = PRUNING_PATTERNS[rule] - if kept_because is None: - kept_because = {} - if n == 0: - return keep - - a = None - for a in sorted(archives, key=attrgetter("ts"), reverse=True): - period = period_func(a) - if period != last: - last = period - if a.id not in kept_because: - keep.append(a) - kept_because[a.id] = (rule, len(keep)) - if len(keep) == n: - break - # Keep oldest archive if we didn't reach the target retention count - if a is not None and len(keep) < n and a.id not in kept_because: - keep.append(a) - kept_because[a.id] = (rule + "[oldest]", len(keep)) + if since_timestamp is None: + raise ValueError("since_timestamp is required when using interval-based pruning") + n, earliest_timestamp = None, since_timestamp - n_or_interval + + keep: dict[ArchiveInfo, KeepResult] = {} + + def can_retain(a): + if n is not None: + return n == -1 or len(keep) < n + else: + return a.ts > earliest_timestamp + + prev_period = None + for archive in archives: + if not can_retain(archive): + break + period = rule.period_func(archive) + if period != prev_period: + prev_period = period + if archive not in keep and archive not in previously_kept: + keep[archive] = KeepResult(rule=rule, idx=len(keep)) + + if keep_oldest: + # Keep oldest archive if we didn't reach the target retention. + oldest_archive = archives[-1] + if oldest_archive not in keep and oldest_archive not in previously_kept and can_retain(oldest_archive): + keep[oldest_archive] = KeepResult(rule=rule, idx=len(keep), oldest=True) + return keep @@ -119,27 +177,46 @@ class PruneMixIn: @with_repository(compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest): """Prune archives according to specified rules.""" - if not any( - ( - args.secondly, - args.minutely, - args.hourly, - args.daily, - args.weekly, - args.monthly, - args.quarterly_13weekly, - args.quarterly_3monthly, - args.yearly, - args.within, - ) - ): - raise CommandError( - 'At least one of the "keep-within", "keep-last", ' - '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' - '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' - 'or "keep-yearly" settings must be specified.' + self._validate_prune_args(args) + + match = [args.name] if args.name else args.match_archives + archives = manifest.archives.list(match=match, sort_by=["ts"], reverse=True) + archives = [ai for ai in archives if "@PROT" not in ai.tags] + + # Archives to keep along with the rule that ensured them being kept + keep = {} + + since = getattr(args, PRUNE_SINCE.key) + candidate_archives = archives + + if since is not None: + # Prefilter: Archives from _after_ the `prune_since` time are skipped entirely. + for archive in archives: + if archive.ts <= since: + break + keep[archive] = KeepResult(rule=PRUNE_SINCE, idx=len(keep)) + candidate_archives = archives[len(keep) :] + + # Apply each retention rule to all candidate archives. The + # `previously_kept` parameter prevents later (coarser-grained) rules + # from double-counting archives already retained by earlier rules. + active_rules = [ + (rule, getattr(args, rule.key)) for rule in PRUNING_RULES if getattr(args, rule.key) is not None + ] + for rule, n_or_interval in active_rules: + keep |= prune( + archives=candidate_archives, + rule=rule, + n_or_interval=n_or_interval, + since_timestamp=(since if since is not None else datetime.now().astimezone()), + keep_oldest=( + rule == active_rules[-1][0] + ), # Activate keep_oldest rule only for the largest active interval + previously_kept=keep, ) + archives_to_prune = set(archives) - set(keep) + if args.format is not None: format = args.format elif args.short: @@ -148,38 +225,17 @@ def do_prune(self, args, repository, manifest): format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]") formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec) - match = [args.name] if args.name else args.match_archives - archives = manifest.archives.list(match=match, sort_by=["ts"], reverse=True) - archives = [ai for ai in archives if "@PROT" not in ai.tags] - - keep = [] - # collect the rule responsible for the keeping of each archive in this dict - # keys are archive ids, values are a tuple - # (, ) - kept_because = {} - - # find archives which need to be kept because of the keep-within rule - if args.within: - keep += prune_within(archives, args.within, kept_because) - - # find archives which need to be kept because of the various time period rules - for rule in PRUNING_PATTERNS.keys(): - num = getattr(args, rule, None) - if num is not None: - keep += prune_split(archives, rule, num, kept_because) - - to_delete = set(archives) - set(keep) - if not args.json: - logger.info("Repository contains %d archives.", manifest.archives.count()) - logger.info("Applying rules to the matching %d archives...", len(archives)) - logger.info("Keeping %d archives, pruning %d archives.", len(keep), len(to_delete)) if args.json: output_data = [] + else: + logger.info("Repository contains %d archives.", manifest.archives.count()) + logger.info("Applying rules to the matching %d archives...", len(archives)) + logger.info("Keeping %d archives, pruning %d archives.", len(keep), len(archives_to_prune)) + list_logger = logging.getLogger("borg.output.list") # set up counters for the progress display - to_delete_len = len(to_delete) - archives_deleted = 0 - pi = ProgressIndicatorPercent(total=len(to_delete), msg="Pruning archives %3.0f%%", msgid="prune") + num_archives_deleted = 0 + pi = ProgressIndicatorPercent(total=len(archives_to_prune), msg="Pruning archives %3.0f%%", msgid="prune") for archive_info in archives: if sig_int and sig_int.action_done(): break @@ -189,49 +245,94 @@ def do_prune(self, args, repository, manifest): archive_data = formatter.get_item_data(archive_info, jsonline=True) else: archive_formatted = formatter.format_item(archive_info, jsonline=False) - if archive_info in to_delete: + if archive_info in archives_to_prune: if not args.json: pi.show() - archives_deleted += 1 + num_archives_deleted += 1 if args.dry_run: log_message = "Would prune:" else: - log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) + log_message = f"Pruning archive ({num_archives_deleted}/{len(archives_to_prune)}):" manifest.archives.delete_by_id(archive_info.id) if args.json: archive_data["kept"] = False - archive_data["deleted_archive_number"] = archives_deleted + archive_data["deleted_archive_number"] = num_archives_deleted else: - rule, num = kept_because[archive_info.id] - log_message = "Keeping archive (rule: {rule} #{num}):".format(rule=rule, num=num) + result = keep[archive_info] + result_message = f"{result.rule.key}{'[oldest]' if result.oldest else ''} #{result.idx + 1}" + log_message = f"Keeping archive (rule: {result_message}):" if args.json: archive_data["kept"] = True - archive_data["keep_rule"] = rule - archive_data["kept_archive_number"] = num + archive_data["keep_rule"] = result.rule.key + archive_data["kept_oldest"] = result.oldest + archive_data["kept_archive_number"] = result.idx + 1 if args.json: if ( args.output_list or not (args.list_pruned or args.list_kept) - or (args.list_pruned and archive_info in to_delete) - or (args.list_kept and archive_info not in to_delete) + or (args.list_pruned and archive_info in archives_to_prune) + or (args.list_kept and archive_info not in archives_to_prune) ): output_data.append(archive_data) elif ( args.output_list - or (args.list_pruned and archive_info in to_delete) - or (args.list_kept and archive_info not in to_delete) + or (args.list_pruned and archive_info in archives_to_prune) + or (args.list_kept and archive_info not in archives_to_prune) ): list_logger.info(f"{log_message:<44} {archive_formatted}") if not args.json: pi.finish() if args.json: json_print(basic_json_data(manifest, extra={"archives": output_data})) - if archives_deleted > 0 and not args.dry_run: + if num_archives_deleted > 0 and not args.dry_run: manifest.write() self.print_warning('Done. Run "borg compact" to free space.', wc=None) if sig_int: raise Error("Got Ctrl-C / SIGINT.") + def _validate_prune_args(self, args): + keep_args = {rule.key: getattr(args, rule.key) for rule in PRUNING_RULES if getattr(args, rule.key) is not None} + + if len(keep_args) == 0: + raise CommandError( + 'At least one of the "keep", "keep-within", "keep-last", ' + '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' + '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' + 'or "keep-yearly" settings must be specified.' + ) + + if PRUNE_KEEP.key in keep_args and PRUNE_LAST.key in keep_args: + raise CommandError('Only one of the "keep" and "last" settings may be specified.') + + if PRUNE_KEEP.key in keep_args and PRUNE_WITHIN.key in keep_args: + raise CommandError('Only one of the "keep" and "within" settings may be specified.') + + def lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val): + return ( + f"The combination of \"{lo_arg}='{lo_val}'\" and \"{hi_arg}='{hi_val}'\" is invalid. It is effectively " + f"useless since every archive matched by {hi_arg} would have already been matched by {lo_arg} and may " + "have lead to undefined behavior were it allowed." + ) + + prune_keys = {rule.key for rule in PRUNING_RULES if rule != PRUNE_LAST} + interval_args = [ + (arg, val) + for arg, val in keep_args.items() + if arg in prune_keys and (isinstance(val, timedelta) or val == -1) + ] + for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(interval_args, 2): + if lo_val == -1 or lo_val >= hi_val: + raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) + + int_args = [ + (arg, val) + for arg, val in keep_args.items() + if any((arg == r.key for r in PRUNING_RULES)) and isinstance(val, int) + ] + for (lo_arg, lo_val), (hi_arg, hi_val) in combinations(int_args, 2): + if lo_val == -1: + raise CommandError(lo_hi_mismatch_errmsg(lo_arg, lo_val, hi_arg, hi_val)) + def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from ._common import process_epilog from ._common import define_archive_filters_group @@ -266,33 +367,82 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from different machines) in one shared repository, use one prune call per series. - The ``--keep-within`` option takes an argument of the form "", - where char is "y", "m", "w", "d", "H", "M", or "S". For example, - ``--keep-within 2d`` means to keep all archives that were created within - the past 2 days. "1m" is taken to mean "31d". The archives kept with - this option do not count towards the totals specified by any other options. - - A good procedure is to thin out more and more the older your backups get. - As an example, ``--keep-daily 7`` means to keep the latest backup on each day, - up to 7 most recent days with backups (days without backups do not count). - The rules are applied from secondly to yearly, and backups selected by previous - rules do not count towards those of later rules. The time that each backup - starts is used for pruning purposes. Dates and times are interpreted in the local - timezone of the system where borg prune runs, and weeks go from Monday to Sunday. - Specifying a negative number of archives to keep means that there is no limit. - - Borg will retain the oldest archive if any of the secondly, minutely, hourly, - daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to - meet its retention target. This enables the first chronological archive to - continue aging until it is replaced by a newer archive that meets the retention - criteria. + The ``--keep`` option is the simplest way to specify a basic retention + policy. It accepts a count or a time interval for retention (e.g. + ``10`` or ``7d``, ``4w``). With a count it keeps at most that many + recent archives; with an interval it keeps all archives created within + that time window. When ``--since`` is given together with an interval + retention, the interval is measured backwards from that timestamp + instead of from the current time. See ``Date and Time`` docs for exact + INTERVAL format. + + The ``--keep-last N`` and ``--keep-within INTERVAL`` options are + alternatives with equivalent functionality to ``--keep`` with a count + or interval respectively. ``--keep`` cannot be used together with + ``--keep-last`` or ``--keep-within``. + + The ``--keep-secondly``, ``--keep-minutely``, ``--keep-hourly``, + ``--keep-daily``, ``--keep-weekly``, ``--keep-monthly``, + ``--keep-13weekly``, ``--keep-3monthly``, and ``--keep-yearly`` options + specify time period retention policies. They accept either a count N for + retention or a time interval INTERVAL for retention, same as for ``--keep``. + With a retention count, they keep at most that many archives (one per + period, e.g. one per day or one per month until the retention count is + met). With a retention interval, they keep one archive per period + within that time span (e.g. at most one per day in a span of seven + days, even if some days had none) -- measured from ``--since`` if given, + otherwise from the current time. Specifying a count of ``-1`` (or the + word ``all``) means no limit. A zero count or zero-length interval + keeps nothing. + + The ``--since`` option restricts pruning to archives older than the given + TIMESTAMP. Archives newer than this timestamp are kept unconditionally + as a pre-filter. When ``--since`` is used together with interval-based + ``--keep-*`` options (e.g. ``--keep-daily 7d``), the interval is + measured backwards from the given timestamp rather than from the + current time. Count-based retention is unaffected. The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different strategies for keeping archives every quarter year. - The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will - keep the last N archives under the assumption that you do not create more than one - backup archive in the same second). + The oldest archive is always kept. This is useful for rolling tiered backup + schemes, where the earliest backup in a retention window should survive until + the next tier's interval naturally replaces it. + + When using interval-based pruning with multiple ``--keep-*`` options, + the intervals must be specified in increasing order of coarseness. + For example, ``--keep-daily 7d --keep-weekly 4w`` is valid, but + ``--keep-daily 30d --keep-weekly 7d`` is not, because the weekly + interval is already covered by the daily one. + + + A practical approach for recurring backups is to use rules + with increasing coarseness so that most of recent history is kept and + older history gradually thins out with time. For example, + ``--keep-daily 7d --keep-weekly 4w --keep-monthly 6`` keeps an + archive per day for the past week, per week for the past month, and + one per month for six months after that. Combine this with ``--since`` + to align time windows to calendar boundaries rather than the exact + moment you run prune for more predictable behavior of coarser rules: + ``--keep-daily 7d --keep-weekly 4w --since $(date +%F)``. + + Count-based retention keeps archives less bound to time. For instance, + ``--keep-yearly 3`` retains 3 yearly archives however far back they + span and ``--keep-daily 20`` keeps 20 archives no matter if you missed + a week in between. This can be useful for less regular archive + creation, or if your use case does not map well to specific time + intervals, or if you simply prefer to think of archive retention in + numbers rather than intervals. + + For count-based retention, backups selected by more granular rules do + not count towards those of coarser rules. ``--keep 3 --keep-monthly 2`` + will first keep the 3 latest archives and then keep 2 monthly archives, + skipping ones that were already kept by ``--keep 3``. + + The time that each archive creation started is used to match archives + to pruning periods. Dates and times are interpreted in the local + timezone of your system. Weeks go from Monday to Sunday. + You can influence how the ``--list`` output is formatted by using the ``--short`` option (less wide output) or by giving a custom format using ``--format`` (see @@ -329,90 +479,98 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): "but keys used in it are added to the JSON output. " "Some keys are always present. Note: JSON can only represent text.", ) + subparser.add_argument( + "--since", + metavar="TIMESTAMP", + dest=PRUNE_SINCE.key, + type=timestamp, + action=Highlander, + help="only consider archives older than this for pruning", + ) subparser.add_argument( "--keep-within", metavar="INTERVAL", - dest="within", + dest=PRUNE_WITHIN.key, type=interval, action=Highlander, help="keep all archives within this time interval", ) subparser.add_argument( - "--keep-last", + "--keep-last", dest=PRUNE_LAST.key, type=int, action=Highlander, help="number of archives to keep" + ) + subparser.add_argument( + "--keep", + dest=PRUNE_KEEP.key, + type=int_or_interval, + action=Highlander, + help="number or time interval of archives to keep", + ) + subparser.add_argument( "--keep-secondly", - dest="secondly", - type=int, - default=0, + dest=PRUNE_SECONDLY.key, + type=int_or_interval, action=Highlander, - help="number of secondly archives to keep", + help="number or time interval of secondly archives to keep", ) subparser.add_argument( "--keep-minutely", - dest="minutely", - type=int, - default=0, + dest=PRUNE_MINUTELY.key, + type=int_or_interval, action=Highlander, - help="number of minutely archives to keep", + help="number or time interval of minutely archives to keep", ) subparser.add_argument( "-H", "--keep-hourly", - dest="hourly", - type=int, - default=0, + dest=PRUNE_HOURLY.key, + type=int_or_interval, action=Highlander, - help="number of hourly archives to keep", + help="number or time interval of hourly archives to keep", ) subparser.add_argument( "-d", "--keep-daily", - dest="daily", - type=int, - default=0, + dest=PRUNE_DAILY.key, + type=int_or_interval, action=Highlander, - help="number of daily archives to keep", + help="number or time interval of daily archives to keep", ) subparser.add_argument( "-w", "--keep-weekly", - dest="weekly", - type=int, - default=0, + dest=PRUNE_WEEKLY.key, + type=int_or_interval, action=Highlander, - help="number of weekly archives to keep", + help="number or time interval of weekly archives to keep", ) subparser.add_argument( "-m", "--keep-monthly", - dest="monthly", - type=int, - default=0, + dest=PRUNE_MONTHLY.key, + type=int_or_interval, action=Highlander, - help="number of monthly archives to keep", + help="number or time interval of monthly archives to keep", ) quarterly_group = subparser.add_mutually_exclusive_group() quarterly_group.add_argument( "--keep-13weekly", - dest="quarterly_13weekly", - type=int, - default=0, - help="number of quarterly archives to keep (13 week strategy)", + dest=PRUNE_QUARTERLY_13WEEKLY.key, + type=int_or_interval, + help="number or time interval of quarterly archives to keep (13 week strategy)", ) quarterly_group.add_argument( "--keep-3monthly", - dest="quarterly_3monthly", - type=int, - default=0, - help="number of quarterly archives to keep (3 month strategy)", + dest=PRUNE_QUARTERLY_3MONTHLY.key, + type=int_or_interval, + help="number or time interval of quarterly archives to keep (3 month strategy)", ) subparser.add_argument( "-y", "--keep-yearly", - dest="yearly", - type=int, - default=0, + dest=PRUNE_YEARLY.key, + type=int_or_interval, action=Highlander, - help="number of yearly archives to keep", + help="number or time interval of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument( diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 12db71b2ac..1f21618f69 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -27,7 +27,7 @@ from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper from .parseformat import octal_int, bin_to_hex, hex_to_bin, safe_encode, safe_decode from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd -from .parseformat import eval_escapes, decode_dict, interval +from .parseformat import eval_escapes, decode_dict, interval, int_or_interval from .parseformat import ( PathSpec, FilesystemPathSpec, diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 38762a485a..2ab6832848 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import ClassVar, Any, TYPE_CHECKING, Literal from collections import OrderedDict -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from functools import partial from hashlib import sha256 from string import Formatter @@ -129,9 +129,14 @@ def decode_dict(d, keys, encoding="utf-8", errors="surrogateescape"): def interval(s): - """Convert a string representing a valid interval to a number of seconds.""" - if isinstance(s, int): + """Parse an interval string (e.g. ``7d``, ``2w``, ``30M``) into a timedelta. + + Supported units: y (years, 365d), m (months, 31d), w (weeks), d (days), + H (hours), M (minutes), S (seconds). The value must be nonnegative. + """ + if isinstance(s, timedelta): return s + seconds_in_a_minute = 60 seconds_in_an_hour = 60 * seconds_in_a_minute seconds_in_a_day = 24 * seconds_in_an_hour @@ -159,10 +164,37 @@ def interval(s): except ValueError: seconds = -1 - if seconds <= 0: - raise ArgumentTypeError(f'Invalid number "{number}": expected positive integer') + if seconds < 0: + raise ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer') + + return timedelta(seconds=seconds) + + +def int_or_interval(s): + """Parse *s* as an :class:`int` or, failing that, as an interval string. + + Returns :class:`int` if *s* can be parsed as an integer (e.g. ``"7"``), + or :class:`datetime.timedelta` if *s* is a valid interval (e.g. ``"7d"``). - return seconds + Raises :class:`ArgumentTypeError` if *s* is neither an integer nor + a valid interval. + """ + if isinstance(s, (int, timedelta)): + return s + + # Explicitly check 'all' as a shortcut to 'infinite' sentinel value `-1`. + if s == "all": + return -1 + + try: + return int(s) + except ValueError: + pass + + try: + return interval(s) + except ArgumentTypeError as e: + raise ArgumentTypeError(f"Value is neither an integer nor an interval: {e}") class CompressionSpec: diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 14420a7a5d..45d987c0ba 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -1,26 +1,35 @@ import json +import pytest import re +from operator import attrgetter from datetime import datetime, timezone, timedelta - -import pytest - from ...constants import * # NOQA -from ...archiver.prune_cmd import prune_split, prune_within +from ...archiver.prune_cmd import ( + prune, + PRUNE_DAILY, + PRUNE_HOURLY, + PRUNE_MINUTELY, + PRUNE_MONTHLY, + PRUNE_SECONDLY, + PRUNE_WEEKLY, + PRUNE_WITHIN, + PRUNE_YEARLY, +) +from ...helpers import CommandError, interval +from ...manifest import ArchiveInfo from . import cmd, RK_ENCRYPTION, generate_archiver_tests -from ...helpers import interval pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0): - cmd( - archiver, - "create", - "--timestamp", - datetime(y, m, d, H, M, S, 0).strftime(ISO_FORMAT_NO_USECS), # naive == local time / local tz - name, - backup_files, - ) +def _create_archive_dt(archiver, backup_files, name, dt, tzinfo=timezone.utc): + if dt.tzinfo is None: + dt = dt.replace(tzinfo=tzinfo) + cmd(archiver, "create", "--timestamp", dt.isoformat(timespec="microseconds"), name, backup_files) + + +def _create_archive_ts(archiver, backup_files, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=timezone.utc): + _create_archive_dt(archiver, backup_files, name, datetime(y, m, d, H, M, S, us, tzinfo=tzinfo)) def test_prune_repository(archivers, request, backup_files): @@ -35,7 +44,8 @@ def test_prune_repository(archivers, request, backup_files): output = cmd(archiver, "repo-list") assert "test1" in output assert "test2" in output - cmd(archiver, "prune", "--keep-daily=1") + output = cmd(archiver, "prune", "--list", "--keep-daily=1") + assert re.search(r"Pruning archive \(1/1\):\s+test1", output) output = cmd(archiver, "repo-list") assert "test1" not in output # The latest archive must still be there: @@ -105,6 +115,67 @@ def test_prune_repository_example(archivers, request, backup_files): assert "test%02d" % i not in output +# This test must match docs/misc/prune-example-interval.txt +def test_prune_repository_example_interval(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + # All timestamps are at exactly 16:00 UTC. + # This models the example: backups on most days from 2025-11-15 to + # 2026-06-04, with skips on 2026-03-31 and 2026-06-03. Of these only + # 2026-05-28 should be pruned after today's pruning. + archive_dates = [ + (2025, 11, 15), + (2025, 12, 31), + (2026, 1, 31), + (2026, 2, 28), + (2026, 3, 30), + (2026, 4, 30), + (2026, 5, 28), + (2026, 5, 29), + (2026, 5, 30), + (2026, 5, 31), + (2026, 6, 1), + (2026, 6, 2), + (2026, 6, 4), + ] + + names = [f"backup_{y:04d}-{m:02d}-{d:02d}" for y, m, d in archive_dates] + for (y, m, d), name in zip(archive_dates, names): + _create_archive_ts(archiver, backup_files, name, y, m, d) + + output = cmd( + archiver, + "prune", + "--list", + "--dry-run", + "--since=2026-06-04T16:00:00+00:00", + "--keep-daily=1w", + "--keep-monthly=5m", + "--keep-yearly=2", + ) + + daily_kept = [ + "backup_2026-06-04", + "backup_2026-06-02", + "backup_2026-06-01", + "backup_2026-05-31", + "backup_2026-05-30", + "backup_2026-05-29", + ] + for i, name in enumerate(daily_kept, 1): + assert re.search(rf"Keeping archive \(rule: daily #{i}\):\s+{name}", output) + + monthly_kept = ["backup_2026-04-30", "backup_2026-03-30", "backup_2026-02-28", "backup_2026-01-31"] + for i, name in enumerate(monthly_kept, 1): + assert re.search(rf"Keeping archive \(rule: monthly #{i}\):\s+{name}", output) + + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+backup_2025-12-31", output) + assert re.search(r"Keeping archive \(rule: yearly\[oldest\] #2\):\s+backup_2025-11-15", output) + + assert re.search(r"Would prune:\s+backup_2026-05-28", output) + + def test_prune_quarterly(archivers, request, backup_files): # Example worked through by hand when developing the quarterly # strategy, based on existing backups where the quarterly strategy @@ -257,150 +328,133 @@ def test_prune_ignore_protected(archivers, request): cmd(archiver, "create", "archive3", archiver.input_path) output = cmd(archiver, "prune", "--list", "--keep-last=1", "--match-archives=sh:archive*") assert "archive1" not in output # @PROT archives are completely ignored. - assert re.search(r"Keeping archive \(rule: secondly #1\):\s+archive3", output) + assert re.search(r"Keeping archive \(rule: last #1\):\s+archive3", output) assert re.search(r"Pruning archive \(.*?\):\s+archive2", output) output = cmd(archiver, "repo-list") assert "archive1" in output # @PROT protected archive1 from deletion assert "archive3" in output # last one -class MockArchive: - def __init__(self, ts, id): - self.ts = ts - self.id = id - - def __repr__(self): - return f"{self.id}: {self.ts.isoformat()}" +mock_id = 0 -# This is the local timezone of the system running the tests. -# We need this e.g. to construct archive timestamps for the prune tests, -# because borg prune operates in the local timezone (it first converts the -# archive timestamp to the local timezone). So, if we want the y/m/d/h/m/s -# values which prune uses to be exactly the ones we give [and NOT shift them -# by tzoffset], we need to give the timestamps in the same local timezone. -# Please note that the timestamps in a real borg archive or manifest are -# stored in UTC timezone. -local_tz = datetime.now(tz=timezone.utc).astimezone(tz=None).tzinfo +def mock_archive(ts, id=None): + """Create an ArchiveInfo with mocked/default values.""" + global mock_id + if id is None: + id = mock_id + mock_id += 1 + return ArchiveInfo(name="", id=id, ts=ts.replace(tzinfo=timezone.utc), tags=(), host="", user="") def test_prune_within(): - def subset(lst, indices): - return {lst[i] for i in indices} - - def dotest(test_archives, within, indices): - for ta in test_archives, reversed(test_archives): - kept_because = {} - keep = prune_within(ta, interval(within), kept_because) - assert set(keep) == subset(test_archives, indices) - assert all("within" == kept_because[a.id][0] for a in keep) - - # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours - test_offsets = [60, 90 * 60, 150 * 60, 210 * 60, 25 * 60 * 60, 49 * 60 * 60] + test_deltas = [ + timedelta(minutes=1), + timedelta(hours=1.5), + timedelta(hours=2.5), + timedelta(hours=3.5), + timedelta(hours=25), + timedelta(hours=49), + ] now = datetime.now(timezone.utc) - test_dates = [now - timedelta(seconds=s) for s in test_offsets] - test_archives = [MockArchive(date, i) for i, date in enumerate(test_dates)] - - dotest(test_archives, "15S", []) - dotest(test_archives, "2M", [0]) - dotest(test_archives, "1H", [0]) - dotest(test_archives, "2H", [0, 1]) - dotest(test_archives, "3H", [0, 1, 2]) - dotest(test_archives, "24H", [0, 1, 2, 3]) - dotest(test_archives, "26H", [0, 1, 2, 3, 4]) - dotest(test_archives, "2d", [0, 1, 2, 3, 4]) - dotest(test_archives, "50H", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "3d", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1w", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1m", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1y", [0, 1, 2, 3, 4, 5]) + test_dates = [now - d for d in test_deltas] + test_archives = [mock_archive(date) for date in test_dates] + + def dotest(within, indices): + keep = prune(test_archives, PRUNE_WITHIN, interval(within), now, False) + assert set(keep) == {test_archives[i] for i in indices} + assert all(keep[a].rule.key == "within" for a in keep) + + dotest("15S", []) + dotest("2M", [0]) + dotest("1H", [0]) + dotest("2H", [0, 1]) + dotest("3H", [0, 1, 2]) + dotest("24H", [0, 1, 2, 3]) + dotest("26H", [0, 1, 2, 3, 4]) + dotest("2d", [0, 1, 2, 3, 4]) + dotest("50H", [0, 1, 2, 3, 4, 5]) + dotest("3d", [0, 1, 2, 3, 4, 5]) + dotest("1w", [0, 1, 2, 3, 4, 5]) + dotest("1m", [0, 1, 2, 3, 4, 5]) + dotest("1y", [0, 1, 2, 3, 4, 5]) @pytest.mark.parametrize( - "rule,num_to_keep,expected_ids", + "rule,num_to_keep,expected_indices", [ - ("yearly", 3, (13, 2, 1)), - ("monthly", 3, (13, 8, 4)), - ("weekly", 2, (13, 8)), - ("daily", 3, (13, 8, 7)), - ("hourly", 3, (13, 10, 8)), - ("minutely", 3, (13, 10, 9)), - ("secondly", 4, (13, 12, 11, 10)), - ("daily", 0, []), + (PRUNE_YEARLY, 3, (12, 1, 0)), + (PRUNE_MONTHLY, 3, (12, 7, 3)), + (PRUNE_WEEKLY, 2, (12, 7)), + (PRUNE_DAILY, 3, (12, 7, 6)), + (PRUNE_HOURLY, 3, (12, 9, 7)), + (PRUNE_MINUTELY, 3, (12, 9, 8)), + (PRUNE_SECONDLY, 4, (12, 11, 10, 9)), + (PRUNE_DAILY, 0, []), + (PRUNE_DAILY, -1, (12, 7, 6, 5, 4, 3, 2, 1, 0)), ], ) -def test_prune_split(rule, num_to_keep, expected_ids): - def subset(lst, ids): - return {i for i in lst if i.id in ids} - +def test_prune(rule, num_to_keep, expected_indices): archives = [ # years apart - MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), - MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=local_tz), 2), - MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=local_tz), 3), + mock_archive(datetime(2015, 1, 1, 10, 0, 0)), + mock_archive(datetime(2016, 1, 1, 10, 0, 0)), + mock_archive(datetime(2017, 1, 1, 10, 0, 0)), # months apart - MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=local_tz), 4), - MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=local_tz), 5), + mock_archive(datetime(2017, 2, 1, 10, 0, 0)), + mock_archive(datetime(2017, 3, 1, 10, 0, 0)), # days apart - MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=local_tz), 6), - MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=local_tz), 7), - MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=local_tz), 8), + mock_archive(datetime(2017, 3, 2, 10, 0, 0)), + mock_archive(datetime(2017, 3, 3, 10, 0, 0)), + mock_archive(datetime(2017, 3, 4, 10, 0, 0)), # minutes apart - MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=local_tz), 9), - MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=local_tz), 10), + mock_archive(datetime(2017, 10, 1, 9, 45, 0)), + mock_archive(datetime(2017, 10, 1, 9, 55, 0)), # seconds apart - MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=local_tz), 11), - MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=local_tz), 12), - MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=local_tz), 13), + mock_archive(datetime(2017, 10, 1, 10, 0, 1)), + mock_archive(datetime(2017, 10, 1, 10, 0, 3)), + mock_archive(datetime(2017, 10, 1, 10, 0, 5)), ] - kept_because = {} - keep = prune_split(archives, rule, num_to_keep, kept_because) - - assert set(keep) == subset(archives, expected_ids) - for item in keep: - assert kept_because[item.id][0] == rule + keep = prune(sorted(archives, key=attrgetter("ts"), reverse=True), rule, num_to_keep, None, False) + assert set(keep) == {archives[i] for i in expected_indices} + assert all(result.rule == rule for _, result in keep.items()) -def test_prune_split_keep_oldest(): - def subset(lst, ids): - return {i for i in lst if i.id in ids} +def test_prune_keep_oldest(): archives = [ # oldest backup, but not last in its year - MockArchive(datetime(2018, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), + mock_archive(datetime(2018, 1, 1, 10, 0, 0)), # an interim backup - MockArchive(datetime(2018, 12, 30, 10, 0, 0, tzinfo=local_tz), 2), + mock_archive(datetime(2018, 12, 30, 10, 0, 0)), # year-end backups - MockArchive(datetime(2018, 12, 31, 10, 0, 0, tzinfo=local_tz), 3), - MockArchive(datetime(2019, 12, 31, 10, 0, 0, tzinfo=local_tz), 4), + mock_archive(datetime(2018, 12, 31, 10, 0, 0)), + mock_archive(datetime(2019, 12, 31, 10, 0, 0)), ] + sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True) # Keep oldest when retention target can't otherwise be met - kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune(sorted_archives, PRUNE_YEARLY, 3, None, True) - assert set(keep) == subset(archives, [1, 3, 4]) - assert kept_because[1][0] == "yearly[oldest]" - assert kept_because[3][0] == "yearly" - assert kept_because[4][0] == "yearly" + assert keep[archives[0]].rule.key == "yearly" and keep[archives[0]].oldest is True + assert keep[archives[2]].rule.key == "yearly" and keep[archives[2]].oldest is False + assert keep[archives[3]].rule.key == "yearly" and keep[archives[3]].oldest is False + assert len(keep) == 3 # Otherwise, prune it - kept_because = {} - keep = prune_split(archives, "yearly", 2, kept_because) + keep = prune(sorted_archives, PRUNE_YEARLY, 2, None, True) - assert set(keep) == subset(archives, [3, 4]) - assert kept_because[3][0] == "yearly" - assert kept_because[4][0] == "yearly" + assert keep[archives[2]].rule.key == "yearly" and keep[archives[2]].oldest is False + assert keep[archives[3]].rule.key == "yearly" and keep[archives[3]].oldest is False + assert len(keep) == 2 -def test_prune_split_no_archives(): +def test_prune_no_archives(): archives = [] - kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune(archives, PRUNE_YEARLY, 3, None, False) - assert keep == [] - assert kept_because == {} + assert keep == {} def test_prune_list_with_metadata_format(archivers, request, backup_files): @@ -437,6 +491,7 @@ def test_prune_json(archivers, request, backup_files): assert kept[0]["name"] == "test2" assert kept[0]["keep_rule"] == "daily" assert kept[0]["kept_archive_number"] == 1 + assert not kept[0]["kept_oldest"] assert "deleted_archive_number" not in kept[0] assert pruned[0]["name"] == "test1" assert pruned[0]["deleted_archive_number"] == 1 @@ -460,3 +515,336 @@ def test_prune_json_list_pruned(archivers, request, backup_files): assert archives[0]["name"] == "test1" assert archives[0]["kept"] is False assert archives[0]["deleted_archive_number"] == 1 + + +def test_prune_keep_last_same_second(archivers, request, backup_files): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "test1", backup_files) + cmd(archiver, "create", "test2", backup_files) + output = cmd(archiver, "prune", "--list", "--dry-run", "--keep-last=2") + # Both archives are kept even though they have the same timestamp to the second. Would previously have failed with + # old behavior of --keep-last. Archives sorted on seconds, order is undefined. + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test1", output) + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep=2", "--keep=1S"]) +def test_prune_keep_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt) + _create_archive_dt(archiver, backup_files, "test-2", dt) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=1)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep=0", "--keep=0S"]) +def test_prune_keep_int_or_interval_zero(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test", dt) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Would prune:\s+test", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-daily=-1", "--keep-daily=all"]) +def test_prune_keep_all(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(days=2)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-3", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-secondly=2", "--keep-secondly=2S"]) +def test_prune_keep_secondly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(seconds=1)) + _create_archive_dt(archiver, backup_files, "test-2", dt - timedelta(seconds=1, microseconds=999999)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(seconds=2)) + _create_archive_dt(archiver, backup_files, "test-4", dt - timedelta(seconds=2, microseconds=1)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Would prune:\s+test-4", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-minutely=3", "--keep-minutely=3M"]) +def test_prune_keep_minutely_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 59, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(minutes=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(minutes=2)).replace(second=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(minutes=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(minutes=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(minutes=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-hourly=3", "--keep-hourly=3H"]) +def test_prune_keep_hourly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 23, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(hours=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(hours=2)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(hours=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(hours=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(hours=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-daily=3", "--keep-daily=3d"]) +def test_prune_keep_daily_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=1)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=2)).replace(second=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=2)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=3)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=3)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-weekly=3", "--keep-weekly=3w"]) +def test_prune_keep_weekly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=7)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=14)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=14)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=21)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=21)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-monthly=3", "--keep-monthly=3m"]) +def test_prune_keep_monthly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=31)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=62)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=62)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=93)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=93)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +# 2023-12-31 is Sunday, week 52. Makes these week calculations a little easier. +@pytest.mark.parametrize("keep_arg", ["--keep-13weekly=3", "--keep-13weekly=39w"]) +def test_prune_keep_13weekly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=91)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=182)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=182)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=273)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=273)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-3monthly=3", "--keep-3monthly=275d"]) +def test_prune_keep_3monthly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=92)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=184)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=184)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=275)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=275)) + # 275d is the interval from dt to the oldest kept monthly archive + output = cmd(archiver, "prune", "--list", "--short", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@pytest.mark.parametrize("keep_arg", ["--keep-yearly=3", "--keep-yearly=3y"]) +def test_prune_keep_yearly_int_or_interval(archivers, request, backup_files, keep_arg): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + dt = datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc) + _create_archive_dt(archiver, backup_files, "test-1", dt - timedelta(days=365)) + _create_archive_dt(archiver, backup_files, "test-2", (dt - timedelta(days=730)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-3", dt - timedelta(days=730)) + _create_archive_dt(archiver, backup_files, "test-4", (dt - timedelta(days=1095)).replace(microsecond=1)) + _create_archive_dt(archiver, backup_files, "test-5", dt - timedelta(days=1095)) + output = cmd(archiver, "prune", "--list", "--dry-run", "--since", dt.isoformat(), keep_arg) + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +def _cmd_prune_error(archiver, *args): + """Run prune expecting a CommandError. Returns error string for assertions.""" + if archiver.FORK_DEFAULT: + return cmd(archiver, "prune", *args, exit_code=CommandError().exit_code) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune", *args) + return str(error.value) + + +def test_prune_no_args(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + output = _cmd_prune_error(archiver) + assert re.search(r"At least one of the .* settings must be specified.", output) + assert re.search(r"keep(?!-)", output) + flags = [ + "last", + "within", + "secondly", + "minutely", + "hourly", + "daily", + "weekly", + "monthly", + "yearly", + "13weekly", + "3monthly", + ] + for flag in flags: + assert f"keep-{flag}" in output + + +def test_prune_errors_on_keep_and_last(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + output = _cmd_prune_error(archiver, "--dry-run", "--keep-last=5", "--keep=3") + assert 'Only one of the "keep" and "last" settings may be specified.' in output + + +def test_prune_errors_on_keep_and_within(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + output = _cmd_prune_error(archiver, "--dry-run", "--keep-within=7d", "--keep=3") + assert 'Only one of the "keep" and "within" settings may be specified.' in output + + +@pytest.mark.parametrize( + "lo_val,hi_val", + [("14d", "7d"), ("-1", "7d"), ("-1", "1"), ("-1", "-1"), ("all", "7d"), ("all", "1"), ("all", "-1")], +) +def test_prune_warns_on_redundant_interval_flags(archivers, request, lo_val, hi_val): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + output = _cmd_prune_error(archiver, "--dry-run", f"--keep-hourly={lo_val}", f"--keep-daily={hi_val}") + assert "hourly=" in output + assert "daily=" in output + assert "effectively useless" in output + + +def test_prune_int_rolling_schedule_oldest_retention(): + daily_n = 6 + monthly_n = 3 + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + + previous_archives = [] + archives = [] + + for day_offset in range(97): + backup_ts = start_date + timedelta(days=day_offset) + previous_archives = archives + archives = [mock_archive(backup_ts, day_offset), *archives] + + keep = {} + keep |= prune(archives, PRUNE_DAILY, daily_n, None, False, keep) + keep |= prune(archives, PRUNE_MONTHLY, monthly_n, None, True, keep) + + archives = sorted(keep.keys(), key=lambda a: a.ts, reverse=True) + + # It is now 2024-04-06. The last run should have just pruned the jan-01 + # archive since the monthly retention count is now satisfied at jan-31. It + # was kept until now to satisfy the oldest-rule. + + assert previous_archives[-1].ts.strftime("%m-%d") == "01-01" + assert archives[-1].ts.strftime("%m-%d") == "01-31" + + +def test_prune_interval_rolling_schedule_oldest_retention(): + daily_interval = timedelta(days=6) + monthly_interval = timedelta(days=31 * 3) # Matching --keep-monthly=3m after argument parsing + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + + previous_archives = [] + archives = [] + + for day_offset in range(94): + backup_ts = start_date + timedelta(days=day_offset) + previous_archives = archives + archives = [mock_archive(backup_ts, day_offset), *archives] + + keep = {} + keep |= prune(archives, PRUNE_DAILY, daily_interval, backup_ts, False, keep) + keep |= prune(archives, PRUNE_MONTHLY, monthly_interval, backup_ts, True, keep) + + print( + f"For backup+prune at {backup_ts.strftime('%m-%d')} ({day_offset})" + f" the following {len(archives)} archives are kept:" + ) + for a, result in keep.items(): + print(f" {a.id}: {a.ts.strftime('%Y-%m-%d')} {result}") + + archives = sorted(keep.keys(), key=lambda a: a.ts, reverse=True) + + # It is now 2024-04-03. The last run should have just pruned the jan-01 + # archive since it now falls outside the retention range (_exactly_ 93 days + # or 3 months ago, timestamp compared exclusively). It was kept until now + # to satisfy the oldest-rule. + + assert previous_archives[-1].ts.strftime("%m-%d") == "01-01" + assert archives[-1].ts.strftime("%m-%d") == "01-31" diff --git a/src/borg/testsuite/helpers/parseformat_test.py b/src/borg/testsuite/helpers/parseformat_test.py index c9cc1b5d59..37dbbb921e 100644 --- a/src/borg/testsuite/helpers/parseformat_test.py +++ b/src/borg/testsuite/helpers/parseformat_test.py @@ -1,7 +1,8 @@ import base64 import os -from datetime import datetime, timezone +import re +from datetime import datetime, timedelta, timezone import pytest @@ -17,6 +18,7 @@ format_file_size, parse_file_size, interval, + int_or_interval, partial_format, clean_lines, format_line, @@ -388,13 +390,14 @@ def test_format_timedelta(): @pytest.mark.parametrize( "timeframe, num_secs", [ - ("5S", 5), - ("2M", 2 * 60), - ("1H", 60 * 60), - ("1d", 24 * 60 * 60), - ("1w", 7 * 24 * 60 * 60), - ("1m", 31 * 24 * 60 * 60), - ("1y", 365 * 24 * 60 * 60), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("2M", timedelta(minutes=2)), + ("1H", timedelta(hours=1)), + ("1d", timedelta(days=1)), + ("1w", timedelta(days=7)), + ("1m", timedelta(days=31)), + ("1y", timedelta(days=365)), ], ) def test_interval(timeframe, num_secs): @@ -404,9 +407,9 @@ def test_interval(timeframe, num_secs): @pytest.mark.parametrize( "invalid_interval, error_tuple", [ - ("H", ('Invalid number "": expected positive integer',)), - ("-1d", ('Invalid number "-1": expected positive integer',)), - ("food", ('Invalid number "foo": expected positive integer',)), + ("H", ('Invalid number "": expected nonnegative integer',)), + ("-1d", ('Invalid number "-1": expected nonnegative integer',)), + ("food", ('Invalid number "foo": expected nonnegative integer',)), ], ) def test_interval_time_unit(invalid_interval, error_tuple): @@ -415,10 +418,56 @@ def test_interval_time_unit(invalid_interval, error_tuple): assert exc.value.args == error_tuple -def test_interval_number(): +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("x", r'^Unexpected time unit "x": choose from'), + ("-1t", r'^Unexpected time unit "t": choose from'), + ("fool", r'^Unexpected time unit "l": choose from'), + ("abc", r'^Unexpected time unit "c": choose from'), + (" abc ", r'^Unexpected time unit " ": choose from'), + ], +) +def test_interval_invalid_time_format(invalid_input, error_regex): + with pytest.raises(ArgumentTypeError) as exc: + interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) + + +@pytest.mark.parametrize( + "input, result", + [ + ("0", 0), + ("5", 5), + (" 999 ", 999), + ("-1", -1), + ("all", -1), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("1m", timedelta(days=31)), + # already-converted values (jsonargparse idempotency) + (0, 0), + (5, 5), + (timedelta(seconds=5), timedelta(seconds=5)), + (timedelta(days=31), timedelta(days=31)), + ], +) +def test_int_or_interval(input, result): + assert int_or_interval(input) == result + + +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("H", r"Value is neither an integer nor an interval:"), + ("-1d", r"Value is neither an integer nor an interval:"), + ("food", r"Value is neither an integer nor an interval:"), + ], +) +def test_int_or_interval_time_unit(invalid_input, error_regex): with pytest.raises(ArgumentTypeError) as exc: - interval("5") - assert exc.value.args == ('Unexpected time unit "5": choose from y, m, w, d, H, M, S',) + int_or_interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) def test_parse_timestamp():