instructlab
diff --git a/‎.github/workflows/constraints-update.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/constraints-update.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎.github/workflows/e2e-nvidia-l4-x1.yml‎
Lines changed: 19 additions & 34 deletions b/‎.github/workflows/e2e-nvidia-l4-x1.yml‎
Lines changed: 19 additions & 34 deletions
diff --git a/‎.github/workflows/e2e-nvidia-l40s-x4.yml‎
Lines changed: 18 additions & 33 deletions b/‎.github/workflows/e2e-nvidia-l40s-x4.yml‎
Lines changed: 18 additions & 33 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/test.yml‎
Lines changed: 5 additions & 2 deletions
@@ -0,0 +1,33 @@
+name: Update constraints-dev.txt
+
+on:
+  schedule:
+    - cron: '0 3 * * 1'  # Every Monday at 03:00 UTC
+  workflow_dispatch:
+
+jobs:
+  update-constraints:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Checkout "update-constraints" in-house CI action
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          repository: instructlab/ci-actions
+          path: ci-actions
+          # no tag that includes https://github.com/instructlab/ci-actions/pull/26, yet
+          ref: 88641ccaf122964eacdc1a82b18bda369b6f99bd # main
+          sparse-checkout: |
+            actions/update-constraints
+
+      - name: Update constraints
+        id: update-constraints
+        uses: ./ci-actions/actions/update-constraints
+        with:
+          gh-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
@@ -18,6 +18,7 @@ on:
       - '**.py'
       - 'pyproject.toml'
       - 'requirements**.txt'
+      - 'constraints-dev.txt'
       - '.github/workflows/e2e-nvidia-l4-x1.yml' # This workflow
   workflow_dispatch:
 
@@ -112,45 +113,29 @@ jobs:
       - name: Install ilab
         working-directory: ./instructlab
         run: |
-          export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
-          export PATH="$PATH:$CUDA_HOME/bin"
-          python3.11 -m venv --upgrade-deps venv
-          . venv/bin/activate
-          nvidia-smi
-          python3.11 -m pip cache remove llama_cpp_python
-
-          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
-
-          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
-
-          # pre-install some build dependencies
-          $pip_install packaging wheel setuptools-scm
-
-          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
-          # before torch is installed. This is a bug because their setup.py depends on
-          # importing the module, so it should have been listed in build_requires. Alas!
-          #
-          # See: https://github.com/Dao-AILab/flash-attention/pull/958
-          # Also: https://github.com/instructlab/instructlab/issues/1821
-          #
-          # first, pre-install flash-attn build dependencies
-          $pip_install torch packaging setuptools wheel psutil ninja
-
-          # now build flash-attn using the pre-installed build dependencies; this will
-          # guarantee that the build version of torch will match the runtime version of
-          # torch; otherwise, all kinds of problems may occur, like missing symbols when
-          # accessing C extensions and such
-          $pip_install flash-attn --no-build-isolation
-
-          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
-          $pip_install .[cuda] -r requirements-vllm-cuda.txt
+          PYTHON=python3.11 ./scripts/install-ilab-with-cuda.sh
         
       - name: Update instructlab-eval library
         working-directory: ./eval
         run: |
           . ../instructlab/venv/bin/activate
-          pip install -v .
+          # Patch out our own pin from the ilab repo constraints file
+          ilab_constraints=../instructlab/constraints-dev.txt
+          sed -i '/instructlab-eval==/d' $ilab_constraints
+
+          # Since we reuse the virtual environment prepared using ilab
+          # constraints, we should stick to the same constraints when
+          # installing latest eval.
+          #
+          # FIX: this is not ideal; a proper fix would require decoupling the
+          # two repos in CI: either by removing the job completely and relying
+          # on "sdk" (no ilab) test runs; or by preparing a separate
+          # constraints file that would consider both the requirements files
+          # for the eval library AND for the ilab - so that they are
+          # consistent.
+          pip_install="pip install -c $ilab_constraints"
+          $pip_install .
+          $pip_install .[cuda]
 
       - name: Run e2e test
         working-directory: ./instructlab
 
@@ -142,44 +142,29 @@ jobs:
       - name: Install ilab
         working-directory: ./instructlab
         run: |
-          export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
-          export PATH="$PATH:$CUDA_HOME/bin"
-          python3.11 -m venv --upgrade-deps venv
-          . venv/bin/activate
-          nvidia-smi
-          python3.11 -m pip cache remove llama_cpp_python
-
-          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
-
-          # pre-install some build dependencies
-          $pip_install packaging wheel setuptools-scm
-
-          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
-          # before torch is installed. This is a bug because their setup.py depends on
-          # importing the module, so it should have been listed in build_requires. Alas!
-          #
-          # See: https://github.com/Dao-AILab/flash-attention/pull/958
-          # Also: https://github.com/instructlab/instructlab/issues/1821
-          #
-          # first, pre-install flash-attn build dependencies
-          $pip_install torch packaging setuptools wheel psutil ninja
-
-          # now build flash-attn using the pre-installed build dependencies; this will
-          # guarantee that the build version of torch will match the runtime version of
-          # torch; otherwise, all kinds of problems may occur, like missing symbols when
-          # accessing C extensions and such
-          $pip_install flash-attn --no-build-isolation
-
-          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
-          $pip_install .[cuda] -r requirements-vllm-cuda.txt
+          PYTHON=python3.11 ./scripts/install-ilab-with-cuda.sh
 
       - name: Update instructlab-eval library
         working-directory: ./eval
         run: |
           . ../instructlab/venv/bin/activate
-          pip install .
-          pip install .[cuda]
+          # Patch out our own pin from the ilab repo constraints file
+          ilab_constraints=../instructlab/constraints-dev.txt
+          sed -i '/instructlab-eval==/d' $ilab_constraints
+
+          # Since we reuse the virtual environment prepared using ilab
+          # constraints, we should stick to the same constraints when
+          # installing latest eval.
+          #
+          # FIX: this is not ideal; a proper fix would require decoupling the
+          # two repos in CI: either by removing the job completely and relying
+          # on "sdk" (no ilab) test runs; or by preparing a separate
+          # constraints file that would consider both the requirements files
+          # for the eval library AND for the ilab - so that they are
+          # consistent.
+          pip_install="pip install -c $ilab_constraints"
+          $pip_install .
+          $pip_install .[cuda]
 
       - name: Check disk before tests
         run: |
 
@@ -11,6 +11,7 @@ on:
       - '**.py'
       - 'pyproject.toml'
       - 'requirements*.txt'
+      - 'constraints-dev.txt'
       - 'tox.ini'
       - '.pylintrc'
       - 'scripts/*.sh' # Used by this workflow
@@ -23,6 +24,7 @@ on:
       - '**.py'
       - 'pyproject.toml'
       - 'requirements*.txt'
+      - 'constraints-dev.txt'
       - 'tox.ini'
       - '.pylintrc'
       - 'scripts/*.sh' # Used by this workflow
@@ -82,8 +84,9 @@ jobs:
 
       - name: Install tox
         run: |
-          python -m pip install --upgrade pip
-          python -m pip install tox tox-gh
+          pip_install="python -m pip install -c constraints-dev.txt"
+          $pip_install --upgrade pip
+          $pip_install tox tox-gh
 
       - name: "${{ matrix.lint.name }}"
         run: |
 
@@ -12,6 +12,7 @@ on:
       - '**.py'
       - 'pyproject.toml'
       - 'requirements**.txt'
+      - 'constraints-dev.txt'
       - 'tox.ini'
       - 'scripts/*.sh' # Used by this workflow
       - '.github/workflows/test.yml' # This workflow
@@ -23,6 +24,7 @@ on:
       - '**.py'
       - 'pyproject.toml'
       - 'requirements**.txt'
+      - 'constraints-dev.txt'
       - 'tox.ini'
       - 'scripts/*.sh' # Used by this workflow
       - '.github/workflows/test.yml' # This workflow
@@ -99,8 +101,9 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          python -m pip install tox tox-gh>=1.2
+          pip_install="python -m pip install -c constraints-dev.txt"
+          $pip_install --upgrade pip
+          $pip_install tox tox-gh>=1.2
 
       - name: Run unit and functional tests with tox
         run: |