Skip to content

Commit 7d17723

Browse files
authored
Warm page cache before save to eliminate cold-NVMe IOPS bottleneck (#4)
Parallel readers fault all cache files into the OS page cache before tar starts, so tar reads at memory speed (~1300 MB/s) instead of cold NVMe IOPS-limited ~80 MB/s. 2-5x archive speedup depending on instance generation.
1 parent 03433f5 commit 7d17723

1 file changed

Lines changed: 46 additions & 0 deletions

File tree

gradlecache/save.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"context"
88
"fmt"
99
"io"
10+
"io/fs"
1011
"log/slog"
1112
"os"
1213
"os/exec"
@@ -124,6 +125,7 @@ type SaveConfig struct {
124125
GitDir string
125126
GradleUserHome string
126127
IncludedBuilds []string
128+
SkipWarm bool // skip page cache warming (for benchmarking cold baseline)
127129
Metrics MetricsClient
128130
Logger *slog.Logger
129131
}
@@ -194,6 +196,16 @@ func Save(ctx context.Context, cfg SaveConfig) error {
194196
pr, pw := io.Pipe()
195197

196198
log.Info("saving bundle", "commit", cfg.Commit[:min(8, len(cfg.Commit))], "cache-key", cfg.CacheKey)
199+
200+
if !cfg.SkipWarm {
201+
log.Debug("warming page cache")
202+
warmStart := time.Now()
203+
warmPageCache(sources)
204+
log.Debug("page cache warm", "duration", time.Since(warmStart).Round(time.Millisecond))
205+
} else {
206+
log.Debug("skipping page cache warm (SkipWarm=true)")
207+
}
208+
197209
saveStart := time.Now()
198210

199211
// Wrap the archive→upload boundary to measure upload wait time.
@@ -509,6 +521,40 @@ func matchesAny(name string, patterns []string) bool {
509521
return false
510522
}
511523

524+
// warmPageCache reads every regular file under each TarSource in parallel,
525+
// faulting pages into the OS page cache before tar reads them sequentially.
526+
// On cold NVMe storage with many small files (e.g. 200K Gradle cache entries),
527+
// tar is limited to ~80 MB/s by per-file IOPS overhead. Warming the cache with
528+
// parallel readers saturates IOPS up front so that tar subsequently reads at
529+
// memory speed (~1300 MB/s).
530+
func warmPageCache(sources []TarSource) {
531+
concurrency := min(runtime.GOMAXPROCS(0)*2, 32)
532+
sem := make(chan struct{}, concurrency)
533+
var wg sync.WaitGroup
534+
535+
for _, src := range sources {
536+
root := filepath.Join(src.BaseDir, src.Path)
537+
_ = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
538+
if err != nil || !d.Type().IsRegular() {
539+
return nil
540+
}
541+
sem <- struct{}{}
542+
wg.Add(1)
543+
go func() {
544+
defer func() { <-sem; wg.Done() }()
545+
f, err := os.Open(path)
546+
if err != nil {
547+
return
548+
}
549+
_, _ = io.Copy(io.Discard, f)
550+
_ = f.Close()
551+
}()
552+
return nil
553+
})
554+
}
555+
wg.Wait()
556+
}
557+
512558
// CreateTarZstd creates a zstd-compressed tar archive from the given sources.
513559
// If pzstd is available it is used to produce a multi-frame archive that can
514560
// be decompressed in parallel on restore. Otherwise klauspost is used.

0 commit comments

Comments
 (0)