|
2 | 2 | package main |
3 | 3 |
|
4 | 4 | import ( |
| 5 | + archive_tar "archive/tar" |
5 | 6 | "bytes" |
6 | 7 | "context" |
7 | 8 | "fmt" |
@@ -286,8 +287,7 @@ func TestExtractBundleRouting(t *testing.T) { |
286 | 287 | // ─── Git history walk tests ────────────────────────────────────────────────── |
287 | 288 |
|
288 | 289 | // TestHistoryCommits creates a temporary git repository with a known commit |
289 | | -// graph and verifies that the author-block counting logic matches the |
290 | | -// bundled-cache-manager.rb algorithm. |
| 290 | +// graph |
291 | 291 | func TestHistoryCommits(t *testing.T) { |
292 | 292 | if _, err := exec.LookPath("git"); err != nil { |
293 | 293 | t.Skip("git not available") |
@@ -468,7 +468,7 @@ func TestTarZstdRoundTrip(t *testing.T) { |
468 | 468 | } |
469 | 469 |
|
470 | 470 | // TestTarZstdSymlinkDereference verifies that -h causes symlinked directories |
471 | | -// to be archived as real content (matching bundled-cache-manager.rb's -h flag). |
| 471 | +// to be archived as real content. |
472 | 472 | func TestTarZstdSymlinkDereference(t *testing.T) { |
473 | 473 | if _, err := exec.LookPath("tar"); err != nil { |
474 | 474 | t.Skip("tar not available") |
@@ -750,6 +750,248 @@ func BenchmarkDeltaScanReal(b *testing.B) { |
750 | 750 | b.ReportMetric(float64(totalFiles), "files/op") |
751 | 751 | } |
752 | 752 |
|
| 753 | +// ─── Extraction benchmark ───────────────────────────────────────────────────── |
| 754 | + |
| 755 | +// BenchmarkExtract measures extractTarPlatformRouted throughput against a |
| 756 | +// synthetic tar archive that mimics the structure and file-size distribution of |
| 757 | +// a real Gradle cache bundle: many small metadata/index files (~1 KB) and a |
| 758 | +// smaller number of large jar files (~500 KB). Routing is exercised by |
| 759 | +// including both caches/ and configuration-cache/ entries. |
| 760 | +// |
| 761 | +// Run with: |
| 762 | +// |
| 763 | +// go test -bench=BenchmarkExtract -benchtime=3x ./cmd/gradle-cache/ |
| 764 | +// |
| 765 | +// Output includes files/op and MB/op so you can derive ns/file and MB/s. |
| 766 | +func BenchmarkExtract(b *testing.B) { |
| 767 | + for _, tc := range []struct { |
| 768 | + name string |
| 769 | + smallFiles int // ~1 KB each (metadata, index, lock files) |
| 770 | + largeFiles int // ~512 KB each (jars) |
| 771 | + includeCC bool |
| 772 | + }{ |
| 773 | + {"small_5k", 5_000, 0, false}, |
| 774 | + {"mixed_5k_small_500_large", 5_000, 500, false}, |
| 775 | + {"mixed_with_cc", 5_000, 500, true}, |
| 776 | + } { |
| 777 | + tc := tc |
| 778 | + b.Run(tc.name, func(b *testing.B) { |
| 779 | + // Build the tar in memory once; each iteration re-extracts the same bytes. |
| 780 | + tarBuf := buildSyntheticTar(b, tc.smallFiles, tc.largeFiles, tc.includeCC) |
| 781 | + totalBytes := int64(tarBuf.Len()) |
| 782 | + |
| 783 | + destHome := b.TempDir() |
| 784 | + destProject := b.TempDir() |
| 785 | + |
| 786 | + rules := []extractRule{ |
| 787 | + {prefix: "caches/", baseDir: destHome}, |
| 788 | + {prefix: "configuration-cache/", baseDir: filepath.Join(destProject, ".gradle")}, |
| 789 | + } |
| 790 | + targetFn := func(name string) string { |
| 791 | + for _, rule := range rules { |
| 792 | + if strings.HasPrefix(name, rule.prefix) { |
| 793 | + return filepath.Join(rule.baseDir, name) |
| 794 | + } |
| 795 | + } |
| 796 | + return filepath.Join(destProject, name) |
| 797 | + } |
| 798 | + |
| 799 | + nFiles := tc.smallFiles + tc.largeFiles |
| 800 | + if tc.includeCC { |
| 801 | + nFiles += 10 |
| 802 | + } |
| 803 | + |
| 804 | + b.SetBytes(totalBytes) |
| 805 | + b.ReportAllocs() |
| 806 | + b.ResetTimer() |
| 807 | + |
| 808 | + for range b.N { |
| 809 | + // Each iteration extracts into a fresh directory so we're not |
| 810 | + // benchmarking the skipExisting fast-path. |
| 811 | + iterHome := b.TempDir() |
| 812 | + iterProject := b.TempDir() |
| 813 | + iterRules := []extractRule{ |
| 814 | + {prefix: "caches/", baseDir: iterHome}, |
| 815 | + {prefix: "configuration-cache/", baseDir: filepath.Join(iterProject, ".gradle")}, |
| 816 | + } |
| 817 | + iterFn := func(name string) string { |
| 818 | + for _, rule := range iterRules { |
| 819 | + if strings.HasPrefix(name, rule.prefix) { |
| 820 | + return filepath.Join(rule.baseDir, name) |
| 821 | + } |
| 822 | + } |
| 823 | + return filepath.Join(iterProject, name) |
| 824 | + } |
| 825 | + _ = targetFn // suppress unused warning from outer scope |
| 826 | + if err := extractTarPlatformRouted(bytes.NewReader(tarBuf.Bytes()), iterFn, false); err != nil { |
| 827 | + b.Fatal(err) |
| 828 | + } |
| 829 | + } |
| 830 | + |
| 831 | + b.ReportMetric(float64(nFiles), "files/op") |
| 832 | + b.ReportMetric(float64(totalBytes)/1e6, "MB/op") |
| 833 | + }) |
| 834 | + } |
| 835 | +} |
| 836 | + |
| 837 | +// buildSyntheticTar builds an uncompressed tar archive in memory with |
| 838 | +// smallFiles entries of ~1 KB and largeFiles entries of ~512 KB under caches/, |
| 839 | +// plus 10 configuration-cache entries if includeCC is true. The data is |
| 840 | +// deterministic (repeated 0x42 bytes) so it compresses well but still exercises |
| 841 | +// the full write path. |
| 842 | +func buildSyntheticTar(b *testing.B, smallFiles, largeFiles int, includeCC bool) *bytes.Buffer { |
| 843 | + b.Helper() |
| 844 | + |
| 845 | + const smallSize = 1024 |
| 846 | + const largeSize = 512 * 1024 |
| 847 | + |
| 848 | + smallData := bytes.Repeat([]byte{0x42}, smallSize) |
| 849 | + largeData := bytes.Repeat([]byte{0x55}, largeSize) |
| 850 | + |
| 851 | + var buf bytes.Buffer |
| 852 | + tw := archive_tar.NewWriter(&buf) |
| 853 | + |
| 854 | + writeEntry := func(name string, data []byte) { |
| 855 | + b.Helper() |
| 856 | + hdr := &archive_tar.Header{ |
| 857 | + Typeflag: archive_tar.TypeReg, |
| 858 | + Name: name, |
| 859 | + Size: int64(len(data)), |
| 860 | + Mode: 0o644, |
| 861 | + } |
| 862 | + if err := tw.WriteHeader(hdr); err != nil { |
| 863 | + b.Fatalf("write tar header %s: %v", name, err) |
| 864 | + } |
| 865 | + if _, err := tw.Write(data); err != nil { |
| 866 | + b.Fatalf("write tar data %s: %v", name, err) |
| 867 | + } |
| 868 | + } |
| 869 | + |
| 870 | + for i := range smallFiles { |
| 871 | + writeEntry(fmt.Sprintf("caches/8.14.3/group%d/artifact%d/f%d.index", i%50, i%20, i), smallData) |
| 872 | + } |
| 873 | + for i := range largeFiles { |
| 874 | + writeEntry(fmt.Sprintf("caches/8.14.3/jars-%d/group%d/artifact-%d.jar", i%10, i%30, i), largeData) |
| 875 | + } |
| 876 | + if includeCC { |
| 877 | + for i := range 10 { |
| 878 | + writeEntry(fmt.Sprintf("configuration-cache/entry-%d/work.bin", i), smallData) |
| 879 | + } |
| 880 | + } |
| 881 | + |
| 882 | + if err := tw.Close(); err != nil { |
| 883 | + b.Fatalf("close tar: %v", err) |
| 884 | + } |
| 885 | + return &buf |
| 886 | +} |
| 887 | + |
| 888 | +// BenchmarkExtractVsSymlink compares the current direct-extraction approach |
| 889 | +// against the old extract-to-tmpDir+symlink approach on the same synthetic |
| 890 | +// bundle. Both sub-benchmarks write identical bytes; the difference is whether |
| 891 | +// extraction targets the final directory directly or a sibling staging dir that |
| 892 | +// is then symlinked into place. |
| 893 | +// |
| 894 | +// Run with: |
| 895 | +// |
| 896 | +// go test -bench=BenchmarkExtractVsSymlink -benchtime=3x ./cmd/gradle-cache/ |
| 897 | +func BenchmarkExtractVsSymlink(b *testing.B) { |
| 898 | + for _, tc := range []struct { |
| 899 | + name string |
| 900 | + smallFiles int |
| 901 | + largeFiles int |
| 902 | + includeCC bool |
| 903 | + }{ |
| 904 | + {"small_5k", 5_000, 0, false}, |
| 905 | + {"mixed_5k_small_500_large", 5_000, 500, false}, |
| 906 | + {"mixed_with_cc", 5_000, 500, true}, |
| 907 | + } { |
| 908 | + tc := tc |
| 909 | + tarBuf := buildSyntheticTar(b, tc.smallFiles, tc.largeFiles, tc.includeCC) |
| 910 | + totalBytes := int64(tarBuf.Len()) |
| 911 | + nFiles := tc.smallFiles + tc.largeFiles |
| 912 | + if tc.includeCC { |
| 913 | + nFiles += 10 |
| 914 | + } |
| 915 | + |
| 916 | + // ── direct: extract straight to final destinations ────────────────── |
| 917 | + b.Run(tc.name+"/direct", func(b *testing.B) { |
| 918 | + b.SetBytes(totalBytes) |
| 919 | + b.ReportAllocs() |
| 920 | + b.ResetTimer() |
| 921 | + for range b.N { |
| 922 | + gradleHome := b.TempDir() |
| 923 | + projectDir := b.TempDir() |
| 924 | + rules := []extractRule{ |
| 925 | + {prefix: "caches/", baseDir: gradleHome}, |
| 926 | + {prefix: "configuration-cache/", baseDir: filepath.Join(projectDir, ".gradle")}, |
| 927 | + } |
| 928 | + targetFn := func(name string) string { |
| 929 | + for _, rule := range rules { |
| 930 | + if strings.HasPrefix(name, rule.prefix) { |
| 931 | + return filepath.Join(rule.baseDir, name) |
| 932 | + } |
| 933 | + } |
| 934 | + return filepath.Join(projectDir, name) |
| 935 | + } |
| 936 | + if err := extractTarPlatformRouted(bytes.NewReader(tarBuf.Bytes()), targetFn, false); err != nil { |
| 937 | + b.Fatal(err) |
| 938 | + } |
| 939 | + } |
| 940 | + b.ReportMetric(float64(nFiles), "files/op") |
| 941 | + b.ReportMetric(float64(totalBytes)/1e6, "MB/op") |
| 942 | + }) |
| 943 | + |
| 944 | + // ── tmp+symlink: extract to sibling staging dir, then symlink ──────── |
| 945 | + // Mirrors the old approach: MkdirTemp alongside gradleHome, extract |
| 946 | + // everything flat, then os.Symlink(tmpDir/caches, gradleHome/caches) |
| 947 | + // and os.Symlink(tmpDir/configuration-cache, project/.gradle/cc). |
| 948 | + b.Run(tc.name+"/tmp_symlink", func(b *testing.B) { |
| 949 | + b.SetBytes(totalBytes) |
| 950 | + b.ReportAllocs() |
| 951 | + b.ResetTimer() |
| 952 | + for range b.N { |
| 953 | + gradleHome := b.TempDir() |
| 954 | + projectDir := b.TempDir() |
| 955 | + |
| 956 | + // Stage into a sibling of gradleHome (same filesystem → rename/symlink is instant). |
| 957 | + tmpDir, err := os.MkdirTemp(filepath.Dir(gradleHome), "gradle-cache-bench-*") |
| 958 | + if err != nil { |
| 959 | + b.Fatal(err) |
| 960 | + } |
| 961 | + |
| 962 | + if err := extractTarPlatform(bytes.NewReader(tarBuf.Bytes()), tmpDir); err != nil { |
| 963 | + os.RemoveAll(tmpDir) //nolint:errcheck |
| 964 | + b.Fatal(err) |
| 965 | + } |
| 966 | + |
| 967 | + // Symlink caches/ into gradleHome. |
| 968 | + if err := os.Symlink(filepath.Join(tmpDir, "caches"), filepath.Join(gradleHome, "caches")); err != nil { |
| 969 | + os.RemoveAll(tmpDir) //nolint:errcheck |
| 970 | + b.Fatal(err) |
| 971 | + } |
| 972 | + |
| 973 | + // Symlink configuration-cache/ into project/.gradle/. |
| 974 | + if tc.includeCC { |
| 975 | + if err := os.MkdirAll(filepath.Join(projectDir, ".gradle"), 0o750); err != nil { |
| 976 | + os.RemoveAll(tmpDir) //nolint:errcheck |
| 977 | + b.Fatal(err) |
| 978 | + } |
| 979 | + if err := os.Symlink( |
| 980 | + filepath.Join(tmpDir, "configuration-cache"), |
| 981 | + filepath.Join(projectDir, ".gradle", "configuration-cache"), |
| 982 | + ); err != nil { |
| 983 | + os.RemoveAll(tmpDir) //nolint:errcheck |
| 984 | + b.Fatal(err) |
| 985 | + } |
| 986 | + } |
| 987 | + // Leave tmpDir in place — symlinks point into it, same as old behaviour. |
| 988 | + } |
| 989 | + b.ReportMetric(float64(nFiles), "files/op") |
| 990 | + b.ReportMetric(float64(totalBytes)/1e6, "MB/op") |
| 991 | + }) |
| 992 | + } |
| 993 | +} |
| 994 | + |
753 | 995 | // ─── Helpers ───────────────────────────────────────────────────────────────── |
754 | 996 |
|
755 | 997 | func must(t *testing.T, err error) { |
|
0 commit comments