Skip to content

Commit d3bf16f

Browse files
Extract bundle directly to final destinations, no staging dir or symlinks
Replace the tmpDir → rename/symlink approach with prefix-aware routing that extracts tar entries directly to their final locations. Bundle entries are routed by their first path component: ./caches/... → GRADLE_USER_HOME/caches/... ./configuration-cache/... → <project>/.gradle/configuration-cache/... everything else → <project>/... (buildSrc/build, plugins/*/build, …) Existing files are skipped (skipExisting=true) so a partial pre-existing cache is merged rather than overwritten. This eliminates the cc-keystore lock contention issue that arose when /tmp used a different filesystem than ~/.gradle, and removes all the tmpDir/rename/symlink/isExdev machinery. 🤖 Generated with [Firebender](https://firebender.com) Co-Authored-By: Firebender <help@firebender.com>
1 parent bdd485b commit d3bf16f

4 files changed

Lines changed: 179 additions & 207 deletions

File tree

cmd/gradle-cache/extract_darwin.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,16 @@ const mmapThreshold = 64 * 1024
3333
// handles concurrent writes to independent files efficiently, so parallel
3434
// workers saturate IOPS better than a single sequential writer would.
3535
func extractTarPlatform(r io.Reader, dir string) error {
36-
return extractTarGo(r, dir)
36+
return extractTarGoRouted(r, func(name string) string {
37+
return filepath.Join(dir, name)
38+
}, false)
39+
}
40+
41+
// extractTarPlatformRouted is the routing-aware variant of extractTarPlatform.
42+
// targetFn maps a cleaned tar entry name to its absolute destination path.
43+
// If skipExisting is true, files that already exist on disk are left untouched.
44+
func extractTarPlatformRouted(r io.Reader, targetFn func(string) string, skipExisting bool) error {
45+
return extractTarGoRouted(r, targetFn, skipExisting)
3746
}
3847

3948
// extractTarGo reads a tar stream and extracts it into dir using a Go worker
@@ -49,6 +58,15 @@ func extractTarPlatform(r io.Reader, dir string) error {
4958
// allocates one contiguous extent; the Mach VM subsystem then writes pages
5059
// lazily via fault-in rather than blocking on each write() call.
5160
func extractTarGo(r io.Reader, dir string) error {
61+
return extractTarGoRouted(r, func(name string) string {
62+
return filepath.Join(dir, name)
63+
}, false)
64+
}
65+
66+
// extractTarGoRouted is the core parallel extractor. targetFn maps a cleaned
67+
// tar entry name (e.g. "caches/8.14.3/foo") to its absolute destination path.
68+
// skipExisting skips writing files that already exist on disk.
69+
func extractTarGoRouted(r io.Reader, targetFn func(string) string, skipExisting bool) error {
5270
type fileJob struct {
5371
path string
5472
mode os.FileMode
@@ -93,7 +111,6 @@ func extractTarGo(r io.Reader, dir string) error {
93111
}
94112

95113
tr := tar.NewReader(r)
96-
cleanDir := filepath.Clean(dir) + string(os.PathSeparator)
97114
var readErr error
98115
readLoop:
99116
for {
@@ -106,12 +123,15 @@ readLoop:
106123
break
107124
}
108125

109-
target := filepath.Join(dir, hdr.Name)
110-
if !strings.HasPrefix(filepath.Clean(target)+string(os.PathSeparator), cleanDir) {
126+
// Reject path traversal before passing to targetFn.
127+
name := filepath.Clean(hdr.Name)
128+
if strings.HasPrefix(name, "..") {
111129
readErr = errors.Errorf("tar entry %q escapes destination directory", hdr.Name)
112130
break
113131
}
114132

133+
target := targetFn(name)
134+
115135
switch hdr.Typeflag {
116136
case tar.TypeDir:
117137
if err := ensureDir(target); err != nil {
@@ -120,6 +140,11 @@ readLoop:
120140
}
121141

122142
case tar.TypeReg:
143+
if skipExisting {
144+
if _, err := os.Lstat(target); err == nil {
145+
continue
146+
}
147+
}
123148
if err := ensureDir(filepath.Dir(target)); err != nil {
124149
readErr = errors.Errorf("mkdir %s: %w", hdr.Name, err)
125150
break readLoop
@@ -138,6 +163,11 @@ readLoop:
138163
jobs <- fileJob{path: target, mode: hdr.FileInfo().Mode(), buf: bufPtr}
139164

140165
case tar.TypeSymlink:
166+
if skipExisting {
167+
if _, err := os.Lstat(target); err == nil {
168+
continue
169+
}
170+
}
141171
if err := ensureDir(filepath.Dir(target)); err != nil {
142172
readErr = errors.Errorf("mkdir for symlink %s: %w", hdr.Name, err)
143173
break readLoop

cmd/gradle-cache/extract_default.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,38 @@ import (
1818
// writes from multiple goroutines fragment the writeback queue and are
1919
// consistently slower on Linux ext4/xfs despite identical throughput on APFS.
2020
func extractTarPlatform(r io.Reader, dir string) error {
21-
return extractTarSeq(r, dir)
21+
return extractTarSeqRouted(r, func(name string) string {
22+
return filepath.Join(dir, name)
23+
}, false)
24+
}
25+
26+
// extractTarPlatformRouted is the routing-aware variant of extractTarPlatform.
27+
// targetFn maps a cleaned tar entry name to its absolute destination path.
28+
// If skipExisting is true, files that already exist on disk are left untouched.
29+
func extractTarPlatformRouted(r io.Reader, targetFn func(string) string, skipExisting bool) error {
30+
return extractTarSeqRouted(r, targetFn, skipExisting)
2231
}
2332

2433
// extractTarSeq extracts a tar stream sequentially using a fixed-size copy
2534
// buffer. Files are streamed directly from the tar reader to disk one 1 MiB
2635
// block at a time — the same block-streaming pattern GNU tar uses — so the
2736
// decompressor pipe keeps flowing without large per-file allocations.
2837
func extractTarSeq(r io.Reader, dir string) error {
38+
return extractTarSeqRouted(r, func(name string) string {
39+
return filepath.Join(dir, name)
40+
}, false)
41+
}
42+
43+
// extractTarSeqRouted is the core sequential extractor. targetFn maps a
44+
// cleaned tar entry name (e.g. "caches/8.14.3/foo") to its absolute
45+
// destination path. skipExisting skips writing files that already exist.
46+
func extractTarSeqRouted(r io.Reader, targetFn func(string) string, skipExisting bool) error {
2947
// Single fixed-size copy buffer for all file writes in this call.
3048
// 1 MiB is large enough to amortise write syscall overhead without
3149
// creating memory pressure for many-file archives.
3250
copyBuf := make([]byte, 1<<20)
3351

3452
tr := tar.NewReader(r)
35-
cleanDir := filepath.Clean(dir) + string(os.PathSeparator)
3653

3754
// createdDirs tracks parent directories we have already MkdirAll'd so
3855
// each unique path is only created once (same optimisation as darwin).
@@ -57,18 +74,26 @@ func extractTarSeq(r io.Reader, dir string) error {
5774
return errors.Wrap(err, "read tar entry")
5875
}
5976

60-
target := filepath.Join(dir, hdr.Name)
61-
if !strings.HasPrefix(filepath.Clean(target)+string(os.PathSeparator), cleanDir) {
77+
// Reject path traversal before passing to targetFn.
78+
name := filepath.Clean(hdr.Name)
79+
if strings.HasPrefix(name, "..") {
6280
return errors.Errorf("tar entry %q escapes destination directory", hdr.Name)
6381
}
6482

83+
target := targetFn(name)
84+
6585
switch hdr.Typeflag {
6686
case tar.TypeDir:
6787
if err := ensureDir(target); err != nil {
6888
return errors.Errorf("mkdir %s: %w", hdr.Name, err)
6989
}
7090

7191
case tar.TypeReg:
92+
if skipExisting {
93+
if _, err := os.Lstat(target); err == nil {
94+
continue
95+
}
96+
}
7297
if err := ensureDir(filepath.Dir(target)); err != nil {
7398
return errors.Errorf("mkdir %s: %w", hdr.Name, err)
7499
}
@@ -85,6 +110,11 @@ func extractTarSeq(r io.Reader, dir string) error {
85110
}
86111

87112
case tar.TypeSymlink:
113+
if skipExisting {
114+
if _, err := os.Lstat(target); err == nil {
115+
continue
116+
}
117+
}
88118
if err := ensureDir(filepath.Dir(target)); err != nil {
89119
return errors.Errorf("mkdir for symlink %s: %w", hdr.Name, err)
90120
}

cmd/gradle-cache/main.go

Lines changed: 67 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// This format is compatible with the bundled-cache-manager Ruby script.
66
//
77
// On restore, the tool walks the local git history (counting distinct-author
8-
// "blocks") to find the most recent S3 hit, downloads it, extracts it to a
9-
// temporary directory, and symlinks $GRADLE_USER_HOME/caches into place.
8+
// "blocks") to find the most recent S3 hit, downloads it, and extracts it
9+
// directly into the final destination directories (no staging dir, no symlinks).
1010
// A restore marker file is written immediately after extraction; save-delta uses
1111
// its mtime as the baseline to identify files created during the build.
1212
//
@@ -233,9 +233,27 @@ func (c *RestoreCmd) Run(ctx context.Context) error {
233233
dlStart := time.Now()
234234
slog.Info("downloading bundle", "commit", hitCommit[:min(8, len(hitCommit))])
235235

236-
tmpDir, err := os.MkdirTemp("", "gradle-cache-*")
236+
// Ensure GRADLE_USER_HOME exists before extracting into it.
237+
if err := os.MkdirAll(c.GradleUserHome, 0o750); err != nil {
238+
return errors.Wrap(err, "create gradle user home dir")
239+
}
240+
241+
// Resolve the project directory upfront; bundle entries are routed here for
242+
// configuration-cache and convention build dirs.
243+
projectDir, err := os.Getwd()
237244
if err != nil {
238-
return errors.Wrap(err, "create temp dir")
245+
return errors.Wrap(err, "get working directory")
246+
}
247+
248+
// Route tar entries to their final destinations directly:
249+
// ./caches/... → GRADLE_USER_HOME/caches/...
250+
// ./configuration-cache/... → <project>/.gradle/configuration-cache/...
251+
// everything else → <project>/... (buildSrc/build, plugins/*/build, …)
252+
// Existing files are left untouched (skipExisting=true) so a partial
253+
// pre-existing cache is merged rather than overwritten.
254+
rules := []extractRule{
255+
{prefix: "caches/", baseDir: c.GradleUserHome},
256+
{prefix: "configuration-cache/", baseDir: filepath.Join(projectDir, ".gradle")},
239257
}
240258

241259
body, err := store.get(ctx, hitCommit, c.CacheKey, hitSize)
@@ -247,7 +265,7 @@ func (c *RestoreCmd) Run(ctx context.Context) error {
247265
// countingBody records bytes consumed and timestamps when the S3 body is
248266
// exhausted so we can log download speed independently of extraction.
249267
cb := &countingBody{r: body, dlStart: dlStart}
250-
if err := extractTarZstd(ctx, cb, tmpDir); err != nil {
268+
if err := extractBundleZstd(ctx, cb, rules, projectDir); err != nil {
251269
return errors.Wrap(err, "extract bundle")
252270
}
253271

@@ -268,23 +286,6 @@ func (c *RestoreCmd) Run(ctx context.Context) error {
268286
slog.Info("restore pipeline complete",
269287
"total_duration", totalElapsed.Round(time.Millisecond))
270288

271-
// Symlink $GRADLE_USER_HOME/caches → tmpDir/caches.
272-
cachesTarget := filepath.Join(tmpDir, "caches")
273-
if _, err := os.Stat(cachesTarget); err != nil {
274-
return errors.Errorf("extracted bundle does not contain a caches/ directory: %w", err)
275-
}
276-
localCaches := filepath.Join(c.GradleUserHome, "caches")
277-
if err := os.MkdirAll(c.GradleUserHome, 0o750); err != nil {
278-
return errors.Wrap(err, "create gradle user home dir")
279-
}
280-
if err := os.RemoveAll(localCaches); err != nil {
281-
return errors.Wrap(err, "remove existing caches dir")
282-
}
283-
if err := os.Symlink(cachesTarget, localCaches); err != nil {
284-
return errors.Wrap(err, "symlink caches dir")
285-
}
286-
slog.Info("restored", "link", localCaches, "target", cachesTarget)
287-
288289
// Write a marker recording when the base restore finished.
289290
// save-delta compares file mtimes against this to identify files created
290291
// during the build. Our Go extractor never calls chtimes, so all restored
@@ -321,57 +322,59 @@ func (c *RestoreCmd) Run(ctx context.Context) error {
321322
}
322323
}
323324

324-
// Restore configuration-cache and convention build dirs from the current directory.
325-
projectDir, err := os.Getwd()
326-
if err != nil {
327-
return errors.Wrap(err, "get working directory")
328-
}
329-
if err := restoreProjectDirs(tmpDir, projectDir, c.IncludedBuilds); err != nil {
330-
return err
331-
}
332-
333325
slog.Debug("restore complete", "total_duration", time.Since(totalStart))
334326
return nil
335327
}
336328

337-
// restoreProjectDirs symlinks configuration-cache and included build output dirs
338-
// from tmpDir into projectDir, if present in the extracted bundle.
339-
// includedBuilds specifies which directories to check (see conventionBuildDirs).
340-
func restoreProjectDirs(tmpDir, projectDir string, includedBuilds []string) error {
341-
// configuration-cache: archived at ./configuration-cache/ relative to the bundle root
342-
// (not under .gradle/), matching the bundled-cache-manager.rb archive format.
343-
srcCC := filepath.Join(tmpDir, "configuration-cache")
344-
if _, err := os.Stat(srcCC); err == nil {
345-
dstCC := filepath.Join(projectDir, ".gradle", "configuration-cache")
346-
if err := os.MkdirAll(filepath.Dir(dstCC), 0o750); err != nil {
347-
return errors.Wrap(err, "create .gradle dir")
348-
}
349-
if err := os.RemoveAll(dstCC); err != nil {
350-
return errors.Wrap(err, "remove existing configuration-cache")
351-
}
352-
if err := os.Symlink(srcCC, dstCC); err != nil {
353-
return errors.Wrap(err, "symlink configuration-cache")
354-
}
355-
slog.Info("restored", "link", dstCC, "target", srcCC)
329+
// extractRule maps a tar entry path prefix to a destination base directory.
330+
// For an entry "prefix/rest/of/path", the file is placed at
331+
// filepath.Join(baseDir, "prefix/rest/of/path").
332+
type extractRule struct {
333+
prefix string // without leading "./"
334+
baseDir string
335+
}
336+
337+
// extractBundleZstd decompresses and extracts a base bundle, routing tar
338+
// entries to their final destinations based on rules. Any entry whose path does
339+
// not match a rule is placed under defaultDir. Existing files are not
340+
// overwritten (skipExisting semantics), so a partial pre-existing cache is
341+
// merged rather than replaced.
342+
func extractBundleZstd(ctx context.Context, r io.Reader, rules []extractRule, defaultDir string) error {
343+
zstdCmd := zstdDecompressCmd(ctx)
344+
zstdCmd.Stdin = r
345+
346+
var zstdStderr bytes.Buffer
347+
zstdCmd.Stderr = &zstdStderr
348+
349+
zstdOut, err := zstdCmd.StdoutPipe()
350+
if err != nil {
351+
return errors.Wrap(err, "zstd stdout pipe")
356352
}
357353

358-
// Included build output dirs present in the extracted bundle.
359-
for _, rel := range conventionBuildDirs(tmpDir, includedBuilds) {
360-
src := filepath.Join(tmpDir, rel)
361-
dst := filepath.Join(projectDir, rel)
362-
if err := os.MkdirAll(filepath.Dir(dst), 0o750); err != nil {
363-
return errors.Errorf("create parent of %s: %w", dst, err)
364-
}
365-
if err := os.RemoveAll(dst); err != nil {
366-
return errors.Errorf("remove existing %s: %w", dst, err)
367-
}
368-
if err := os.Symlink(src, dst); err != nil {
369-
return errors.Errorf("symlink %s: %w", rel, err)
354+
if err := zstdCmd.Start(); err != nil {
355+
return errors.Wrap(err, "start zstd")
356+
}
357+
358+
targetFn := func(name string) string {
359+
for _, rule := range rules {
360+
if strings.HasPrefix(name, rule.prefix) {
361+
return filepath.Join(rule.baseDir, name)
362+
}
370363
}
371-
slog.Info("restored", "link", dst, "target", src)
364+
return filepath.Join(defaultDir, name)
372365
}
373366

374-
return nil
367+
extractErr := extractTarPlatformRouted(zstdOut, targetFn, true)
368+
zstdErr := zstdCmd.Wait()
369+
370+
var errs []error
371+
if extractErr != nil {
372+
errs = append(errs, extractErr)
373+
}
374+
if zstdErr != nil {
375+
errs = append(errs, errors.Errorf("zstd: %w: %s", zstdErr, zstdStderr.String()))
376+
}
377+
return errors.Join(errs...)
375378
}
376379

377380
// RestoreDeltaCmd downloads and applies a branch-specific delta bundle on top of

0 commit comments

Comments
 (0)