databricks-jdbc/.github/workflows/warmMavenCache.yml at ec803b82d1e5990c55b2d601991edd469169ffee · databricks/databricks-jdbc · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
name: Warm Maven Dependency Cache

# This workflow pre-downloads all Maven dependencies via JFrog Artifactory
# and saves them to the GitHub Actions cache. Forked PRs (which cannot
# authenticate to JFrog) restore this cache to build without credentials.
#
# Triggers:
#   - push to main when pom.xml changes (keeps cache fresh after dep updates)
#   - daily schedule (prevents 7-day cache eviction)
#   - manual dispatch (with optional PR number to warm cache for a fork's pom.xml)

on:
  push:
    branches: [main]
    paths: ['**/pom.xml']
  schedule:
    - cron: '0 6 * * *' # Daily at 06:00 UTC
  workflow_dispatch:
    inputs:
      pr_number:
        description: 'PR number to warm cache for (reads pom.xml from the PR branch). Leave empty to warm from main.'
        required: false
        type: string

permissions:
  id-token: write
  contents: read
  pull-requests: read # Needed to read PR metadata for fork checkout

jobs:
  warm-cache:
    # Run on Linux only. Maven repository contents (JARs/POMs) are platform-independent.
    # Windows forked PRs restore this same cache via the restore-keys prefix match.
    # Note: Windows runners in databricks-protected-runner-group lack bash, which
    # is required for the OIDC token exchange scripts.
    runs-on:
      group: databricks-protected-runner-group
      labels: linux-ubuntu-latest

    steps:
      - name: Set up JDK
        uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
        with:
          java-version: 21
          distribution: 'adopt'

      # If PR number provided, checkout only pom.xml files from the fork (security: no source code)
      - name: Checkout PR pom.xml files (sparse)
        if: inputs.pr_number != ''
        shell: bash
        run: |
          set -euo pipefail

          # Fetch PR metadata
          PR_DATA=$(curl -sLS \
            -H "Accept: application/vnd.github+json" \
            -H "Authorization: Bearer ${{ github.token }}" \
            "https://api.github.com/repos/${{ github.repository }}/pulls/${{ inputs.pr_number }}")

          FORK_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name')
          FORK_REF=$(echo "$PR_DATA" | jq -r '.head.ref')

          echo "Warming cache for PR #${{ inputs.pr_number }} from ${FORK_REPO}@${FORK_REF}"

          # Sparse checkout: only pom.xml files (no source code from fork)
          git init .
          git remote add fork "https://github.com/${FORK_REPO}.git"
          git config core.sparseCheckout true
          echo "**/pom.xml" > .git/info/sparse-checkout
          echo "pom.xml" >> .git/info/sparse-checkout
          git fetch --depth=1 fork "${FORK_REF}"
          git checkout FETCH_HEAD

      - name: Checkout main branch
        if: inputs.pr_number == ''
        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

      - name: Get JFrog OIDC token
        shell: bash
        run: |
          set -euo pipefail

          # Get GitHub OIDC ID token
          ID_TOKEN=$(curl -sLS \
            -H "User-Agent: actions/oidc-client" \
            -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
            "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=jfrog-github" | jq .value | tr -d '"')
          echo "::add-mask::${ID_TOKEN}"

          # Exchange for JFrog access token
          ACCESS_TOKEN=$(curl -sLS -XPOST -H "Content-Type: application/json" \
            "https://databricks.jfrog.io/access/api/v1/oidc/token" \
            -d "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"${ID_TOKEN}\", \"provider_name\": \"github-actions\"}" | jq .access_token | tr -d '"')
          echo "::add-mask::${ACCESS_TOKEN}"

          if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
            echo "FAIL: Could not extract JFrog access token"
            exit 1
          fi

          echo "JFROG_ACCESS_TOKEN=${ACCESS_TOKEN}" >> "$GITHUB_ENV"
          echo "JFrog OIDC token obtained successfully"

      - name: Configure Maven with JFrog credentials
        shell: bash
        run: |
          set -euo pipefail

          mkdir -p ~/.m2
          cat > ~/.m2/settings.xml << EOF
          <settings>
            <mirrors>
              <mirror>
                <id>jfrog-central</id>
                <mirrorOf>*</mirrorOf>
                <url>https://databricks.jfrog.io/artifactory/db-maven/</url>
              </mirror>
            </mirrors>
            <servers>
              <server>
                <id>jfrog-central</id>
                <username>gha-service-account</username>
                <password>${JFROG_ACCESS_TOKEN}</password>
              </server>
            </servers>
          </settings>
          EOF

      - name: Resolve all dependencies via JFrog
        shell: bash
        run: |
          set -euo pipefail

          # Run the EXACT same Maven commands as the PR CI workflows.
          # This is the only reliable way to ensure every plugin, provider,
          # and transitive dependency is resolved and cached. Each command
          # mirrors a real CI step from prCheck.yml, prIntegrationTests.yml,
          # or coverageReport.yml.

          echo "=== 1/8: spotless:check (formatting-check job) ==="
          mvn -B --errors spotless:check || true

          echo "=== 2/8: install all modules (packaging-tests job) ==="
          mvn -B -pl jdbc-core,assembly-uber,assembly-thin clean install -DskipTests -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Ddependency-check.skip=true

          echo "=== 3/8: Arrow Patch Tests (unit-tests job, JDK 17+) ==="
          mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Ddependency-check.skip=true || true

          echo "=== 4/8: Arrow Allocator Tests (unit-tests job, JDK 17+) ==="
          mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Dtest="ArrowBufferAllocatorNettyManagerTest,ArrowBufferAllocatorUnsafeManagerTest,ArrowBufferAllocatorUnknownManagerTest" -DforkCount=1 -DreuseForks=false -Ddependency-check.skip=true || true

          echo "=== 5/8: Arrow Memory Tests (unit-tests job) ==="
          mvn -B -Plow-memory -pl jdbc-core test -Dtest='DatabricksArrowPatchMemoryUsageTest' -Ddependency-check.skip=true || true

          echo "=== 6/8: Unit Tests with jacoco (unit-tests job) ==="
          mvn -B -pl jdbc-core clean test -Dtest="DatabricksParameterMetaDataTest#testInitialization" -Dgroups='!Jvm17PlusAndArrowToNioReflectionDisabled' jacoco:report -Ddependency-check.skip=true || true

          echo "=== 7/8: Integration test compile (prIntegrationTests job) ==="
          mvn -B -pl jdbc-core compile test-compile -Ddependency-check.skip=true || true

          echo "=== 8/8: Resolve all declared plugins ==="
          mvn -B -pl jdbc-core dependency:resolve-plugins -Ddependency-check.skip=true || true

          echo "Dependency resolution complete"

      - name: Normalize _remote.repositories before saving cache
        shell: bash
        run: |
          # Replace 'jfrog-central' with 'central' in _remote.repositories files.
          # These files track which repo ID each artifact was downloaded from. The
          # cache warmer downloads from 'jfrog-central' (the JFrog mirror), but
          # Maven's offline mode expects artifacts to be associated with 'central'
          # (the default Maven Central repo ID). Without this, offline mode refuses
          # cached artifacts with "has not been downloaded from it before".
          COUNT=$(find ~/.m2/repository -name '_remote.repositories' -print | wc -l)
          find ~/.m2/repository -name '_remote.repositories' -exec sed -i 's/jfrog-central/central/g' {} \;
          echo "Normalized ${COUNT} _remote.repositories markers (jfrog-central -> central)"

      - name: Save Maven dependency cache
        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
        with:
          path: ~/.m2/repository
          key: maven-deps-${{ hashFiles('**/pom.xml') }}