diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 39e1d8bc9..119f667b5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,8 @@ name: Tests on: [push, pull_request, workflow_dispatch] env: - LALE_DOWNLOAD_CACHE_DIR: ${{ github.workspace }}/.cache/data + DOWNLOAD_CACHE_DIR: /home/runner/scikit_learn_data + LALE_DOWNLOAD_CACHE_DIR: /home/runner/scikit_learn_data/lale jobs: static: @@ -19,17 +20,9 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: + cache: 'pip' + cache-dependency-path: setup.py python-version: ${{ matrix.python-version }} - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 - name: Install numpy run: pip install -U numpy - name: Install dependencies @@ -53,16 +46,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: 3.9 - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 + cache: 'pip' + cache-dependency-path: setup.py - name: Install doc dependencies run: pip install -r docs/requirements.txt - name: Install lale @@ -89,16 +74,8 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 + cache: 'pip' + cache-dependency-path: setup.py - name: Install numpy run: pip install -U numpy - name: Install dependencies @@ -112,18 +89,18 @@ jobs: - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Download the test data run: python lale/datasets/prefetch.py - name: Cache test data save uses: actions/cache/save@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} # test_matrix_pr and test_matrix_master have a lot of redundancy @@ -187,25 +164,17 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: setup.py - name: Install system packages run: sudo apt-get install graphviz swig - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Install numpy run: pip install -U numpy @@ -285,25 +254,17 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: setup.py - name: Install system packages run: sudo apt-get install graphviz swig - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Install numpy run: pip install -U numpy @@ -343,25 +304,17 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: setup.py - name: Install system packages run: sudo apt-get install graphviz swig - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Install numpy run: pip install -U numpy @@ -413,23 +366,15 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 + cache: 'pip' + cache-dependency-path: setup.py - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Install packages run: sudo apt-get install graphviz swig @@ -506,23 +451,15 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Cache pip - uses: actions/cache@v4 - with: - # This path is specific to Ubuntu - path: ${{ env.pythonLocation }} - # Look to see if there is a cache hit for the setup file - key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip-new3 - ${{ runner.os }}-new3 + cache: 'pip' + cache-dependency-path: setup.py - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Install packages run: sudo apt-get install graphviz swig @@ -580,10 +517,10 @@ jobs: - name: Cache test data restore uses: actions/cache/restore@v4 with: - path: ${{ env.LALE_DOWNLOAD_CACHE_DIR }} - key: ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} + path: ${{ env.DOWNLOAD_CACHE_DIR }} + key: ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}-${{ hashFiles('lale/datasets/prefetch.py') }} restore-keys: | - ${{ runner.os }}-dcache-new3-${{ env.LALE_DOWNLOAD_CACHE_DIR }}- + ${{ runner.os }}-dcache-new3-${{ env.DOWNLOAD_CACHE_DIR }}- ${{ runner.os }}-dcache-new3 - name: Run test run: py.test -v --capture=tee-sys ${{matrix.test-case}} diff --git a/lale/datasets/prefetch.py b/lale/datasets/prefetch.py index ecbd69c4d..4759b14b0 100644 --- a/lale/datasets/prefetch.py +++ b/lale/datasets/prefetch.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sklearn.datasets + import lale.lib.aif360.datasets from lale.datasets.movie_review import load_movie_review from lale.datasets.multitable.fetch_datasets import fetch_go_sales_dataset @@ -93,6 +95,16 @@ def prefetch_data(): fetch_fairness_dbs() + sklearn.datasets.fetch_california_housing() + sklearn.datasets.load_digits() + sklearn.datasets.load_iris() + sklearn.datasets.fetch_20newsgroups() + sklearn.datasets.load_diabetes() + sklearn.datasets.fetch_covtype() + sklearn.datasets.load_diabetes() + sklearn.datasets.fetch_openml(name="house_prices", as_frame=True) + sklearn.datasets.load_breast_cancer() + def main(): prefetch_data()