diff --git a/.github/workflows/submit_example.yaml b/.github/workflows/submit_example.yaml new file mode 100644 index 0000000..fd2bcb2 --- /dev/null +++ b/.github/workflows/submit_example.yaml @@ -0,0 +1,38 @@ +--- +# This workflow will install Python dependencies and run tests + +name: Unit test and code coverage + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.13'] + submission: ['example'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get update + sudo apt install libbz2-dev + python -m pip install --upgrade pip + pip install wheel + pip install . + pip install .[dev] + if [ -f requirements_${{ matrix.submission }}.txt ]; then pip install -r requirements_${{ matrix.submission }}.txt; fi + - name: Run unit tests with pytest + run: | + python -m pytest tests/test_${{ matrix.submission }}.py diff --git a/requirements_example.txt b/requirements_example.txt new file mode 100644 index 0000000..2681716 --- /dev/null +++ b/requirements_example.txt @@ -0,0 +1,2 @@ +pz-rail-base +pz-rail-sklearn diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..82c46cc --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,251 @@ +import os +from pathlib import Path +import pytest + +from rail.core.data import TableHandle +from rail.estimation.algos import sklearn_neurnet +from rail.utils import catalog_utils + +from pz_data_challenge.taskset_1 import run_taskset_1 +from pz_data_challenge.taskset_2 import run_taskset_2 + +from pz_data_challenge import submit_utils + +SUBMISSION_NAME: str = "example" +SUBMISSION_URL: str = "https://s3df.slac.stanford.edu/people/echarles/submit_example.tgz" + +# don't change these +SUBMIT_DIR: str = f"submissions/{SUBMISSION_NAME}" +PUBLIC_AREA: str = "tests/public" + + +@pytest.fixture(name="setup_submit_area", scope="module") +def setup_submit_area(request: pytest.FixtureRequest) -> int: + + if not os.path.exists(SUBMIT_DIR): + submit_utils.download_and_extract_tar(SUBMISSION_URL, SUBMIT_DIR) + + def teardown_submit_area() -> None: + if not os.environ.get("NO_TEARDOWN"): + os.system(f"\\rm -rf {SUBMIT_DIR}") + + try: + os.makedirs(os.path.join(SUBMIT_DIR, "outputs_2")) + except Exception: + pass + + try: + os.makedirs(os.path.join(SUBMIT_DIR, "outputs_3")) + except Exception: + pass + + request.addfinalizer(teardown_submit_area) + + catalog_utils.load_yaml("tests/catalogs.yaml") + catalog_utils.apply("cardinal_roman_rubin") + + return 0 + + +def run_taskset_1_estimation_only( + model_file: str | Path, + test_file: str | Path, + output_file: str | Path, +) -> None: + """ + User supplied function to run estimation for task set 1 + + This function should use a model stored in model_file, which + is downloaded as part of the submission tar file. + + This function should write output data to output_file in qp + format. + + Parameters + ---------- + model_file: + Path to the model. This should be part of the submission + tar file. + test_file: + Path to the test file contains the photometric test data on + which the PZ estimation will be run + output_file: + Path to write the output data to. The output data should + be written in qp format. + """ + test_data = TableHandle("test", path=test_file) + estimator = sklearn_neurnet.SklNeurNetEstimator.make_stage( + name="estimate", + model=model_file, + output_mode="return", + ) + pz_out = estimator.estimate(test_data) + pz_out.data.ancil["object_id"] = test_data()["object_id"].astype(int) + pz_out.path = output_file + pz_out.write() + + +def run_taskset_1_training_and_estimation( + train_file: str | Path, + test_file: str | Path, + output_file: str | Path, +) -> None: + """ + User supplied function to run training and estimation for task set 1 + + This function should train a model and use it. + + This function should write output data to output_file in qp + format. + + Parameters + ---------- + train_file: + Path to the test file contains the photometric test data on + which the PZ estimation will be trained + test_file: + Path to the test file contains the photometric test data on + which the PZ estimation will be run + output_file: + Path to write the output data to. The output data should + be written in qp format. + """ + train_data = TableHandle("train", path=train_file) + test_data = TableHandle("test", path=test_file) + + informer = sklearn_neurnet.SklNeurNetInformer.make_stage( + name="inform", + ) + model = informer.inform(train_data) + + estimator = sklearn_neurnet.SklNeurNetEstimator.make_stage( + name="estimate", + model=model, + output_mode="return", + ) + pz_out = estimator.estimate(test_data) + pz_out.data.ancil["object_id"] = test_data()["object_id"].astype(int) + pz_out.path = output_file + pz_out.write() + + +def run_taskset_2_estimation_only( + model_file: str | Path, + test_file: str | Path, + output_file: str | Path, +) -> None: + """ + User supplied function to run estimation for task set 1 + + This function should use a model stored in model_file, which + is downloaded as part of the submission tar file. + + This function should write output data to output_file in qp + format. + + Parameters + ---------- + model_file: + Path to the model. This should be part of the submission + tar file. + test_file: + Path to the test file contains the photometric test data on + which the PZ estimation will be run + output_file: + Path to write the output data to. The output data should + be written in qp format. + """ + test_data = TableHandle("test", path=test_file) + estimator = sklearn_neurnet.SklNeurNetEstimator.make_stage( + name="estimate", + model=model_file, + output_mode="return", + ) + pz_out = estimator.estimate(test_data) + pz_out.data.ancil["object_id"] = test_data()["object_id"].astype(int) + pz_out.path = output_file + pz_out.write() + + +def run_taskset_2_training_and_estimation( + train_file: str | Path, + test_file: str | Path, + output_file: str | Path, +) -> None: + """ + User supplied function to run training and estimation for task set 1 + + This function should train a model and use it. + + This function should write output data to output_file in qp + format. + + Parameters + ---------- + test_file: + Path to the test file contains the photometric test data on + which the PZ estimation will be run + output_file: + Path to write the output data to. The output data should + be written in qp format. + """ + train_data = TableHandle("train", path=train_file) + test_data = TableHandle("test", path=test_file) + + informer = sklearn_neurnet.SklNeurNetInformer.make_stage( + name="inform", + ) + model = informer.inform(train_data) + + estimator = sklearn_neurnet.SklNeurNetEstimator.make_stage( + name="estimate", + model=model, + output_mode="return", + ) + pz_out = estimator.estimate(test_data) + pz_out.data.ancil["object_id"] = test_data()["object_id"].astype(int) + pz_out.path = output_file + pz_out.write() + + +def test_example_taskset_1( + setup_public_area: int, + setup_submit_area: int, +) -> None: + """ + Test fuction to validate a submisson for Taskset 1 + + You should not need to change this function + """ + + assert setup_public_area == 0 + assert setup_submit_area == 0 + + run_taskset_1( + PUBLIC_AREA, + SUBMISSION_NAME, + run_taskset_1_estimation_only, + run_taskset_1_training_and_estimation, + ) + + +def test_example_taskset_2( + setup_public_area: int, + setup_submit_area: int, +) -> None: + """ + Test fuction to validate a submisson for Taskset 1 + + You should not need to change this function + """ + + assert setup_public_area == 0 + assert setup_submit_area == 0 + + run_taskset_2( + PUBLIC_AREA, + SUBMISSION_NAME, + run_taskset_2_estimation_only, + run_taskset_2_training_and_estimation, + ) +