Skip to content

Commit 46f870a

Browse files
committed
Add extraction benchmarks for cache bundle restore performance
Measure direct extraction and tmp+symlink restore paths with synthetic Gradle cache bundles so performance changes can be evaluated against realistic file distributions.
1 parent 8f1cc8f commit 46f870a

2 files changed

Lines changed: 250 additions & 8 deletions

File tree

cmd/gradle-cache/main.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ func (c *SaveDeltaCmd) Run(ctx context.Context) error {
666666

667667
// projectDirSources returns tarSource entries for project-specific dirs:
668668
// configuration-cache (from projectDir/.gradle/) and included build output dirs,
669-
// for any that exist on disk. The archive paths match bundled-cache-manager.rb.
669+
// for any that exist on disk.
670670
func projectDirSources(projectDir string, includedBuilds []string) []tarSource {
671671
var sources []tarSource
672672

@@ -740,7 +740,7 @@ type tarSource struct {
740740
}
741741

742742
// historyCommits runs git log from the given ref and returns commit SHAs within
743-
// maxBlocks distinct-author "blocks" (same algorithm as bundled-cache-manager.rb).
743+
// maxBlocks distinct-author "blocks".
744744
func historyCommits(ctx context.Context, gitDir, ref string, maxBlocks int) ([]string, error) {
745745
rawCount := maxBlocks * 10
746746
//nolint:gosec // ref is a user-supplied git ref, not a shell injection vector
@@ -861,10 +861,10 @@ func zstdCompressCmd(ctx context.Context) *exec.Cmd {
861861
}
862862

863863
// createTarZstd creates a zstd-compressed tar archive from the given sources and
864-
// writes it to w. Uses -h to dereference symlinks, matching bundled-cache-manager.rb.
864+
// writes it to w. Uses -h to dereference symlinks.
865865
// Multiple sources map to multiple -C baseDir path entries in the tar command,
866-
// which is how bundled-cache-manager.rb combines caches + configuration-cache +
867-
// convention build dirs into a single flat archive.
866+
// which is how we combine caches + configuration-cache + convention build dirs into a single flat
867+
// archive.
868868
func createTarZstd(ctx context.Context, w io.Writer, sources []tarSource) error {
869869
args := []string{"-chf", "-"}
870870
for _, src := range sources {

cmd/gradle-cache/main_test.go

Lines changed: 245 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
package main
33

44
import (
5+
archive_tar "archive/tar"
56
"bytes"
67
"context"
78
"fmt"
@@ -286,8 +287,7 @@ func TestExtractBundleRouting(t *testing.T) {
286287
// ─── Git history walk tests ──────────────────────────────────────────────────
287288

288289
// TestHistoryCommits creates a temporary git repository with a known commit
289-
// graph and verifies that the author-block counting logic matches the
290-
// bundled-cache-manager.rb algorithm.
290+
// graph
291291
func TestHistoryCommits(t *testing.T) {
292292
if _, err := exec.LookPath("git"); err != nil {
293293
t.Skip("git not available")
@@ -468,7 +468,7 @@ func TestTarZstdRoundTrip(t *testing.T) {
468468
}
469469

470470
// TestTarZstdSymlinkDereference verifies that -h causes symlinked directories
471-
// to be archived as real content (matching bundled-cache-manager.rb's -h flag).
471+
// to be archived as real content.
472472
func TestTarZstdSymlinkDereference(t *testing.T) {
473473
if _, err := exec.LookPath("tar"); err != nil {
474474
t.Skip("tar not available")
@@ -750,6 +750,248 @@ func BenchmarkDeltaScanReal(b *testing.B) {
750750
b.ReportMetric(float64(totalFiles), "files/op")
751751
}
752752

753+
// ─── Extraction benchmark ─────────────────────────────────────────────────────
754+
755+
// BenchmarkExtract measures extractTarPlatformRouted throughput against a
756+
// synthetic tar archive that mimics the structure and file-size distribution of
757+
// a real Gradle cache bundle: many small metadata/index files (~1 KB) and a
758+
// smaller number of large jar files (~500 KB). Routing is exercised by
759+
// including both caches/ and configuration-cache/ entries.
760+
//
761+
// Run with:
762+
//
763+
// go test -bench=BenchmarkExtract -benchtime=3x ./cmd/gradle-cache/
764+
//
765+
// Output includes files/op and MB/op so you can derive ns/file and MB/s.
766+
func BenchmarkExtract(b *testing.B) {
767+
for _, tc := range []struct {
768+
name string
769+
smallFiles int // ~1 KB each (metadata, index, lock files)
770+
largeFiles int // ~512 KB each (jars)
771+
includeCC bool
772+
}{
773+
{"small_5k", 5_000, 0, false},
774+
{"mixed_5k_small_500_large", 5_000, 500, false},
775+
{"mixed_with_cc", 5_000, 500, true},
776+
} {
777+
tc := tc
778+
b.Run(tc.name, func(b *testing.B) {
779+
// Build the tar in memory once; each iteration re-extracts the same bytes.
780+
tarBuf := buildSyntheticTar(b, tc.smallFiles, tc.largeFiles, tc.includeCC)
781+
totalBytes := int64(tarBuf.Len())
782+
783+
destHome := b.TempDir()
784+
destProject := b.TempDir()
785+
786+
rules := []extractRule{
787+
{prefix: "caches/", baseDir: destHome},
788+
{prefix: "configuration-cache/", baseDir: filepath.Join(destProject, ".gradle")},
789+
}
790+
targetFn := func(name string) string {
791+
for _, rule := range rules {
792+
if strings.HasPrefix(name, rule.prefix) {
793+
return filepath.Join(rule.baseDir, name)
794+
}
795+
}
796+
return filepath.Join(destProject, name)
797+
}
798+
799+
nFiles := tc.smallFiles + tc.largeFiles
800+
if tc.includeCC {
801+
nFiles += 10
802+
}
803+
804+
b.SetBytes(totalBytes)
805+
b.ReportAllocs()
806+
b.ResetTimer()
807+
808+
for range b.N {
809+
// Each iteration extracts into a fresh directory so we're not
810+
// benchmarking the skipExisting fast-path.
811+
iterHome := b.TempDir()
812+
iterProject := b.TempDir()
813+
iterRules := []extractRule{
814+
{prefix: "caches/", baseDir: iterHome},
815+
{prefix: "configuration-cache/", baseDir: filepath.Join(iterProject, ".gradle")},
816+
}
817+
iterFn := func(name string) string {
818+
for _, rule := range iterRules {
819+
if strings.HasPrefix(name, rule.prefix) {
820+
return filepath.Join(rule.baseDir, name)
821+
}
822+
}
823+
return filepath.Join(iterProject, name)
824+
}
825+
_ = targetFn // suppress unused warning from outer scope
826+
if err := extractTarPlatformRouted(bytes.NewReader(tarBuf.Bytes()), iterFn, false); err != nil {
827+
b.Fatal(err)
828+
}
829+
}
830+
831+
b.ReportMetric(float64(nFiles), "files/op")
832+
b.ReportMetric(float64(totalBytes)/1e6, "MB/op")
833+
})
834+
}
835+
}
836+
837+
// buildSyntheticTar builds an uncompressed tar archive in memory with
838+
// smallFiles entries of ~1 KB and largeFiles entries of ~512 KB under caches/,
839+
// plus 10 configuration-cache entries if includeCC is true. The data is
840+
// deterministic (repeated 0x42 bytes) so it compresses well but still exercises
841+
// the full write path.
842+
func buildSyntheticTar(b *testing.B, smallFiles, largeFiles int, includeCC bool) *bytes.Buffer {
843+
b.Helper()
844+
845+
const smallSize = 1024
846+
const largeSize = 512 * 1024
847+
848+
smallData := bytes.Repeat([]byte{0x42}, smallSize)
849+
largeData := bytes.Repeat([]byte{0x55}, largeSize)
850+
851+
var buf bytes.Buffer
852+
tw := archive_tar.NewWriter(&buf)
853+
854+
writeEntry := func(name string, data []byte) {
855+
b.Helper()
856+
hdr := &archive_tar.Header{
857+
Typeflag: archive_tar.TypeReg,
858+
Name: name,
859+
Size: int64(len(data)),
860+
Mode: 0o644,
861+
}
862+
if err := tw.WriteHeader(hdr); err != nil {
863+
b.Fatalf("write tar header %s: %v", name, err)
864+
}
865+
if _, err := tw.Write(data); err != nil {
866+
b.Fatalf("write tar data %s: %v", name, err)
867+
}
868+
}
869+
870+
for i := range smallFiles {
871+
writeEntry(fmt.Sprintf("caches/8.14.3/group%d/artifact%d/f%d.index", i%50, i%20, i), smallData)
872+
}
873+
for i := range largeFiles {
874+
writeEntry(fmt.Sprintf("caches/8.14.3/jars-%d/group%d/artifact-%d.jar", i%10, i%30, i), largeData)
875+
}
876+
if includeCC {
877+
for i := range 10 {
878+
writeEntry(fmt.Sprintf("configuration-cache/entry-%d/work.bin", i), smallData)
879+
}
880+
}
881+
882+
if err := tw.Close(); err != nil {
883+
b.Fatalf("close tar: %v", err)
884+
}
885+
return &buf
886+
}
887+
888+
// BenchmarkExtractVsSymlink compares the current direct-extraction approach
889+
// against the old extract-to-tmpDir+symlink approach on the same synthetic
890+
// bundle. Both sub-benchmarks write identical bytes; the difference is whether
891+
// extraction targets the final directory directly or a sibling staging dir that
892+
// is then symlinked into place.
893+
//
894+
// Run with:
895+
//
896+
// go test -bench=BenchmarkExtractVsSymlink -benchtime=3x ./cmd/gradle-cache/
897+
func BenchmarkExtractVsSymlink(b *testing.B) {
898+
for _, tc := range []struct {
899+
name string
900+
smallFiles int
901+
largeFiles int
902+
includeCC bool
903+
}{
904+
{"small_5k", 5_000, 0, false},
905+
{"mixed_5k_small_500_large", 5_000, 500, false},
906+
{"mixed_with_cc", 5_000, 500, true},
907+
} {
908+
tc := tc
909+
tarBuf := buildSyntheticTar(b, tc.smallFiles, tc.largeFiles, tc.includeCC)
910+
totalBytes := int64(tarBuf.Len())
911+
nFiles := tc.smallFiles + tc.largeFiles
912+
if tc.includeCC {
913+
nFiles += 10
914+
}
915+
916+
// ── direct: extract straight to final destinations ──────────────────
917+
b.Run(tc.name+"/direct", func(b *testing.B) {
918+
b.SetBytes(totalBytes)
919+
b.ReportAllocs()
920+
b.ResetTimer()
921+
for range b.N {
922+
gradleHome := b.TempDir()
923+
projectDir := b.TempDir()
924+
rules := []extractRule{
925+
{prefix: "caches/", baseDir: gradleHome},
926+
{prefix: "configuration-cache/", baseDir: filepath.Join(projectDir, ".gradle")},
927+
}
928+
targetFn := func(name string) string {
929+
for _, rule := range rules {
930+
if strings.HasPrefix(name, rule.prefix) {
931+
return filepath.Join(rule.baseDir, name)
932+
}
933+
}
934+
return filepath.Join(projectDir, name)
935+
}
936+
if err := extractTarPlatformRouted(bytes.NewReader(tarBuf.Bytes()), targetFn, false); err != nil {
937+
b.Fatal(err)
938+
}
939+
}
940+
b.ReportMetric(float64(nFiles), "files/op")
941+
b.ReportMetric(float64(totalBytes)/1e6, "MB/op")
942+
})
943+
944+
// ── tmp+symlink: extract to sibling staging dir, then symlink ────────
945+
// Mirrors the old approach: MkdirTemp alongside gradleHome, extract
946+
// everything flat, then os.Symlink(tmpDir/caches, gradleHome/caches)
947+
// and os.Symlink(tmpDir/configuration-cache, project/.gradle/cc).
948+
b.Run(tc.name+"/tmp_symlink", func(b *testing.B) {
949+
b.SetBytes(totalBytes)
950+
b.ReportAllocs()
951+
b.ResetTimer()
952+
for range b.N {
953+
gradleHome := b.TempDir()
954+
projectDir := b.TempDir()
955+
956+
// Stage into a sibling of gradleHome (same filesystem → rename/symlink is instant).
957+
tmpDir, err := os.MkdirTemp(filepath.Dir(gradleHome), "gradle-cache-bench-*")
958+
if err != nil {
959+
b.Fatal(err)
960+
}
961+
962+
if err := extractTarPlatform(bytes.NewReader(tarBuf.Bytes()), tmpDir); err != nil {
963+
os.RemoveAll(tmpDir) //nolint:errcheck
964+
b.Fatal(err)
965+
}
966+
967+
// Symlink caches/ into gradleHome.
968+
if err := os.Symlink(filepath.Join(tmpDir, "caches"), filepath.Join(gradleHome, "caches")); err != nil {
969+
os.RemoveAll(tmpDir) //nolint:errcheck
970+
b.Fatal(err)
971+
}
972+
973+
// Symlink configuration-cache/ into project/.gradle/.
974+
if tc.includeCC {
975+
if err := os.MkdirAll(filepath.Join(projectDir, ".gradle"), 0o750); err != nil {
976+
os.RemoveAll(tmpDir) //nolint:errcheck
977+
b.Fatal(err)
978+
}
979+
if err := os.Symlink(
980+
filepath.Join(tmpDir, "configuration-cache"),
981+
filepath.Join(projectDir, ".gradle", "configuration-cache"),
982+
); err != nil {
983+
os.RemoveAll(tmpDir) //nolint:errcheck
984+
b.Fatal(err)
985+
}
986+
}
987+
// Leave tmpDir in place — symlinks point into it, same as old behaviour.
988+
}
989+
b.ReportMetric(float64(nFiles), "files/op")
990+
b.ReportMetric(float64(totalBytes)/1e6, "MB/op")
991+
})
992+
}
993+
}
994+
753995
// ─── Helpers ─────────────────────────────────────────────────────────────────
754996

755997
func must(t *testing.T, err error) {

0 commit comments

Comments
 (0)