-
Notifications
You must be signed in to change notification settings - Fork 39
183 lines (152 loc) · 7.97 KB
/
warmMavenCache.yml
File metadata and controls
183 lines (152 loc) · 7.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
name: Warm Maven Dependency Cache
# This workflow pre-downloads all Maven dependencies via JFrog Artifactory
# and saves them to the GitHub Actions cache. Forked PRs (which cannot
# authenticate to JFrog) restore this cache to build without credentials.
#
# Triggers:
# - push to main when pom.xml changes (keeps cache fresh after dep updates)
# - daily schedule (prevents 7-day cache eviction)
# - manual dispatch (with optional PR number to warm cache for a fork's pom.xml)
on:
push:
branches: [main]
paths: ['**/pom.xml']
schedule:
- cron: '0 6 * * *' # Daily at 06:00 UTC
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to warm cache for (reads pom.xml from the PR branch). Leave empty to warm from main.'
required: false
type: string
permissions:
id-token: write
contents: read
pull-requests: read # Needed to read PR metadata for fork checkout
jobs:
warm-cache:
# Run on Linux only. Maven repository contents (JARs/POMs) are platform-independent.
# Windows forked PRs restore this same cache via the restore-keys prefix match.
# Note: Windows runners in databricks-protected-runner-group lack bash, which
# is required for the OIDC token exchange scripts.
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
steps:
- name: Set up JDK
uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
with:
java-version: 21
distribution: 'adopt'
# If PR number provided, checkout only pom.xml files from the fork (security: no source code)
- name: Checkout PR pom.xml files (sparse)
if: inputs.pr_number != ''
shell: bash
run: |
set -euo pipefail
# Fetch PR metadata
PR_DATA=$(curl -sLS \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/pulls/${{ inputs.pr_number }}")
FORK_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name')
FORK_REF=$(echo "$PR_DATA" | jq -r '.head.ref')
echo "Warming cache for PR #${{ inputs.pr_number }} from ${FORK_REPO}@${FORK_REF}"
# Sparse checkout: only pom.xml files (no source code from fork)
git init .
git remote add fork "https://github.com/${FORK_REPO}.git"
git config core.sparseCheckout true
echo "**/pom.xml" > .git/info/sparse-checkout
echo "pom.xml" >> .git/info/sparse-checkout
git fetch --depth=1 fork "${FORK_REF}"
git checkout FETCH_HEAD
- name: Checkout main branch
if: inputs.pr_number == ''
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Get JFrog OIDC token
shell: bash
run: |
set -euo pipefail
# Get GitHub OIDC ID token
ID_TOKEN=$(curl -sLS \
-H "User-Agent: actions/oidc-client" \
-H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
"${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=jfrog-github" | jq .value | tr -d '"')
echo "::add-mask::${ID_TOKEN}"
# Exchange for JFrog access token
ACCESS_TOKEN=$(curl -sLS -XPOST -H "Content-Type: application/json" \
"https://databricks.jfrog.io/access/api/v1/oidc/token" \
-d "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"${ID_TOKEN}\", \"provider_name\": \"github-actions\"}" | jq .access_token | tr -d '"')
echo "::add-mask::${ACCESS_TOKEN}"
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
echo "FAIL: Could not extract JFrog access token"
exit 1
fi
echo "JFROG_ACCESS_TOKEN=${ACCESS_TOKEN}" >> "$GITHUB_ENV"
echo "JFrog OIDC token obtained successfully"
- name: Configure Maven with JFrog credentials
shell: bash
run: |
set -euo pipefail
mkdir -p ~/.m2
cat > ~/.m2/settings.xml << EOF
<settings>
<mirrors>
<mirror>
<id>jfrog-central</id>
<mirrorOf>*</mirrorOf>
<url>https://databricks.jfrog.io/artifactory/db-maven/</url>
</mirror>
</mirrors>
<servers>
<server>
<id>jfrog-central</id>
<username>gha-service-account</username>
<password>${JFROG_ACCESS_TOKEN}</password>
</server>
</servers>
</settings>
EOF
- name: Resolve all dependencies via JFrog
shell: bash
run: |
set -euo pipefail
# Run the EXACT same Maven commands as the PR CI workflows.
# This is the only reliable way to ensure every plugin, provider,
# and transitive dependency is resolved and cached. Each command
# mirrors a real CI step from prCheck.yml, prIntegrationTests.yml,
# or coverageReport.yml.
echo "=== 1/8: spotless:check (formatting-check job) ==="
mvn -B --errors spotless:check || true
echo "=== 2/8: install all modules (packaging-tests job) ==="
mvn -B -pl jdbc-core,assembly-uber,assembly-thin clean install -DskipTests -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Ddependency-check.skip=true
echo "=== 3/8: Arrow Patch Tests (unit-tests job, JDK 17+) ==="
mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Ddependency-check.skip=true || true
echo "=== 4/8: Arrow Allocator Tests (unit-tests job, JDK 17+) ==="
mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Dtest="ArrowBufferAllocatorNettyManagerTest,ArrowBufferAllocatorUnsafeManagerTest,ArrowBufferAllocatorUnknownManagerTest" -DforkCount=1 -DreuseForks=false -Ddependency-check.skip=true || true
echo "=== 5/8: Arrow Memory Tests (unit-tests job) ==="
mvn -B -Plow-memory -pl jdbc-core test -Dtest='DatabricksArrowPatchMemoryUsageTest' -Ddependency-check.skip=true || true
echo "=== 6/8: Unit Tests with jacoco (unit-tests job) ==="
mvn -B -pl jdbc-core clean test -Dtest="DatabricksParameterMetaDataTest#testInitialization" -Dgroups='!Jvm17PlusAndArrowToNioReflectionDisabled' jacoco:report -Ddependency-check.skip=true || true
echo "=== 7/8: Integration test compile (prIntegrationTests job) ==="
mvn -B -pl jdbc-core compile test-compile -Ddependency-check.skip=true || true
echo "=== 8/8: Resolve all declared plugins ==="
mvn -B -pl jdbc-core dependency:resolve-plugins -Ddependency-check.skip=true || true
echo "Dependency resolution complete"
- name: Normalize _remote.repositories before saving cache
shell: bash
run: |
# Replace 'jfrog-central' with 'central' in _remote.repositories files.
# These files track which repo ID each artifact was downloaded from. The
# cache warmer downloads from 'jfrog-central' (the JFrog mirror), but
# Maven's offline mode expects artifacts to be associated with 'central'
# (the default Maven Central repo ID). Without this, offline mode refuses
# cached artifacts with "has not been downloaded from it before".
COUNT=$(find ~/.m2/repository -name '_remote.repositories' -print | wc -l)
find ~/.m2/repository -name '_remote.repositories' -exec sed -i 's/jfrog-central/central/g' {} \;
echo "Normalized ${COUNT} _remote.repositories markers (jfrog-central -> central)"
- name: Save Maven dependency cache
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
with:
path: ~/.m2/repository
key: maven-deps-${{ hashFiles('**/pom.xml') }}