Skip to content

Commit afc33de

Browse files
committed
Add branch delta cache support (restore-delta, save-delta)
Introduces a delta workflow for PR builds that layers a branch-specific bundle on top of the main-branch base cache. After a base restore, save-delta scans for files with mtime newer than a restore marker and uploads a cumulative delta keyed by branch name (survives rebases/force-pushes). On the next build, restore --branch downloads both bundles concurrently so the delta's network latency is hidden behind the base extraction. Also adds a bundleStore abstraction over S3 and cachew, consolidates extract_linux.go into extract_default.go, and adds tests and benchmarks for the parallel mtime scanner (4× faster than filepath.Walk on cold APFS for 212k files: 3.4s vs 14s).
1 parent 75673a2 commit afc33de

6 files changed

Lines changed: 936 additions & 139 deletions

File tree

README.md

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
A CLI tool for saving and restoring Gradle build cache bundles from S3.
44

5-
Bundles are stored in S3 keyed by commit SHA, so `restore` doesn't need to know
5+
Bundles are stored keyed by commit SHA, so `restore` doesn't need to know
66
exactly which commit produced a given bundle. Instead, it walks the local git
77
history from a given ref (default: `HEAD`) and tries each commit SHA in order,
8-
newest first, until it finds a bundle that exists in S3. This means a developer
8+
newest first, until it finds a bundle that exists. This means a developer
99
on a feature branch will automatically get the bundle from the most recent
1010
main-branch commit that has one, without needing to know its SHA in advance.
1111

@@ -23,6 +23,8 @@ This installs the latest release to `~/.local/bin`. Set `INSTALL_DIR` to overrid
2323

2424
## Usage
2525

26+
### Base cache (main branch)
27+
2628
```
2729
gradle-cache restore --bucket <bucket> --cache-key <key> [--ref main]
2830
gradle-cache save --bucket <bucket> --cache-key <key>
@@ -37,8 +39,27 @@ are archived alongside `$GRADLE_USER_HOME/caches`. Accepts a direct path
3739
(`buildSrc`, `build-logic`) or a glob (`plugins/*`) to include all
3840
subdirectories. Defaults to `buildSrc`.
3941

40-
Credentials are resolved via the standard AWS credential chain (environment
41-
variables, IRSA, instance profiles, etc.).
42+
### Branch delta cache (PR branches)
43+
44+
For PR builds, pass `--branch` to `restore` to apply a branch delta in the same invocation. The delta bundle is downloaded concurrently with the base extraction so it adds no extra latency:
45+
46+
```sh
47+
# Restore phase (single invocation)
48+
gradle-cache restore --bucket <bucket> --cache-key <key> --ref main --branch $BRANCH_NAME
49+
50+
# ... run the Gradle build ...
51+
52+
# Save phase
53+
gradle-cache save-delta --bucket <bucket> --cache-key <key> --branch $BRANCH_NAME
54+
```
55+
56+
After the build, `save-delta` scans for files created since the restore marker and uploads a cumulative delta bundle keyed by branch name — so it survives rebases and force-pushes without any extra bookkeeping.
57+
58+
If you need to apply a delta separately (e.g. the base was already restored by another step), `restore-delta` is still available as a standalone subcommand.
59+
60+
### Credentials
61+
62+
S3 credentials are resolved via the standard AWS credential chain (environment variables, IRSA, instance profiles, etc.).
4263

4364
## License
4465

cmd/gradle-cache/cachew.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"net/url"
9+
"strings"
10+
11+
"github.com/alecthomas/errors"
12+
)
13+
14+
// cachewClient stores and retrieves Gradle cache bundles via cachew's generic
15+
// object API, removing the need for AWS credentials on the client.
16+
type cachewClient struct {
17+
baseURL string
18+
http *http.Client
19+
}
20+
21+
func newCachewClient(baseURL string) *cachewClient {
22+
return &cachewClient{
23+
baseURL: strings.TrimRight(baseURL, "/"),
24+
http: &http.Client{},
25+
}
26+
}
27+
28+
func (c *cachewClient) objectURL(commit, cacheKey string) string {
29+
return fmt.Sprintf("%s/api/v1/object/%s/%s",
30+
c.baseURL,
31+
url.PathEscape(cacheKey),
32+
url.PathEscape(commit),
33+
)
34+
}
35+
36+
// stat returns (0, nil) when the bundle exists. Size is not used by this
37+
// backend since cachew does not expose Content-Length on HEAD and parallel
38+
// range downloads are handled server-side.
39+
func (c *cachewClient) stat(ctx context.Context, commit, cacheKey string) (int64, error) {
40+
req, err := http.NewRequestWithContext(ctx, http.MethodHead, c.objectURL(commit, cacheKey), nil)
41+
if err != nil {
42+
return 0, err
43+
}
44+
resp, err := c.http.Do(req) //nolint:gosec
45+
if err != nil {
46+
return 0, err
47+
}
48+
io.Copy(io.Discard, resp.Body) //nolint:errcheck,gosec
49+
resp.Body.Close() //nolint:errcheck,gosec
50+
if resp.StatusCode == http.StatusNotFound {
51+
return 0, errors.Errorf("cachew: not found for %.8s", commit)
52+
}
53+
if resp.StatusCode != http.StatusOK {
54+
return 0, errors.Errorf("cachew HEAD %.8s: status %d", commit, resp.StatusCode)
55+
}
56+
return 0, nil
57+
}
58+
59+
func (c *cachewClient) get(ctx context.Context, commit, cacheKey string, _ int64) (io.ReadCloser, error) {
60+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.objectURL(commit, cacheKey), nil)
61+
if err != nil {
62+
return nil, err
63+
}
64+
resp, err := c.http.Do(req) //nolint:gosec
65+
if err != nil {
66+
return nil, err
67+
}
68+
if resp.StatusCode != http.StatusOK {
69+
msg, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
70+
resp.Body.Close() //nolint:errcheck,gosec
71+
return nil, errors.Errorf("cachew GET %.8s: status %d: %s", commit, resp.StatusCode, msg)
72+
}
73+
return resp.Body, nil
74+
}
75+
76+
func (c *cachewClient) put(ctx context.Context, commit, cacheKey string, r io.ReadSeeker, size int64) error {
77+
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.objectURL(commit, cacheKey), r)
78+
if err != nil {
79+
return err
80+
}
81+
req.ContentLength = size
82+
req.Header.Set("Content-Type", "application/zstd")
83+
req.Header.Set("Time-To-Live", "168h") // 7 days
84+
resp, err := c.http.Do(req) //nolint:gosec
85+
if err != nil {
86+
return err
87+
}
88+
msg, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
89+
resp.Body.Close() //nolint:errcheck,gosec
90+
if resp.StatusCode != http.StatusOK {
91+
return errors.Errorf("cachew POST %.8s: status %d: %s", commit, resp.StatusCode, msg)
92+
}
93+
return nil
94+
}

cmd/gradle-cache/extract_default.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build !darwin && !linux
1+
//go:build !darwin
22

33
package main
44

@@ -12,16 +12,30 @@ import (
1212
"github.com/alecthomas/errors"
1313
)
1414

15-
// extractTarPlatform falls back to sequential streaming extraction on unknown platforms.
15+
// extractTarPlatform uses sequential extraction on non-darwin platforms. The
16+
// Linux VFS writeback path coalesces dirty pages most efficiently when a single
17+
// writer produces sequential writes, matching the behaviour of GNU tar. Parallel
18+
// writes from multiple goroutines fragment the writeback queue and are
19+
// consistently slower on Linux ext4/xfs despite identical throughput on APFS.
1620
func extractTarPlatform(r io.Reader, dir string) error {
1721
return extractTarSeq(r, dir)
1822
}
1923

24+
// extractTarSeq extracts a tar stream sequentially using a fixed-size copy
25+
// buffer. Files are streamed directly from the tar reader to disk one 1 MiB
26+
// block at a time — the same block-streaming pattern GNU tar uses — so the
27+
// decompressor pipe keeps flowing without large per-file allocations.
2028
func extractTarSeq(r io.Reader, dir string) error {
29+
// Single fixed-size copy buffer for all file writes in this call.
30+
// 1 MiB is large enough to amortise write syscall overhead without
31+
// creating memory pressure for many-file archives.
2132
copyBuf := make([]byte, 1<<20)
33+
2234
tr := tar.NewReader(r)
2335
cleanDir := filepath.Clean(dir) + string(os.PathSeparator)
2436

37+
// createdDirs tracks parent directories we have already MkdirAll'd so
38+
// each unique path is only created once (same optimisation as darwin).
2539
createdDirs := make(map[string]struct{})
2640
ensureDir := func(d string) error {
2741
if _, ok := createdDirs[d]; ok {
@@ -53,6 +67,7 @@ func extractTarSeq(r io.Reader, dir string) error {
5367
if err := ensureDir(target); err != nil {
5468
return errors.Errorf("mkdir %s: %w", hdr.Name, err)
5569
}
70+
5671
case tar.TypeReg:
5772
if err := ensureDir(filepath.Dir(target)); err != nil {
5873
return errors.Errorf("mkdir %s: %w", hdr.Name, err)
@@ -68,6 +83,7 @@ func extractTarSeq(r io.Reader, dir string) error {
6883
if err := f.Close(); err != nil {
6984
return errors.Errorf("close %s: %w", hdr.Name, err)
7085
}
86+
7187
case tar.TypeSymlink:
7288
if err := ensureDir(filepath.Dir(target)); err != nil {
7389
return errors.Errorf("mkdir for symlink %s: %w", hdr.Name, err)

cmd/gradle-cache/extract_linux.go

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)