Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 12 additions & 27 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,28 @@ jobs:
fail-fast: false
matrix:
python-version:
# - 3.6
- 3.9
# - 3.8
- "3.11"
os:
- "ubuntu-latest"

runs-on: "${{ matrix.os }}"

# use bash everywhere
defaults:
run:
shell: "bash -l {0}"

steps:
- name: "Checkout code"
uses: "actions/checkout@v2"
uses: "actions/checkout@v4"

- name: "Cache conda"
uses: "actions/cache@v1"
env:
# Increase this value to reset cache if env.yml has not changed
CACHE_NUMBER: 0
- name: "Setup Python"
uses: "actions/setup-python@v5"
with:
path: "~/conda_pkgs_dir"
key: "${{ matrix.os }}-conda-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('enviroment.yml') }}"
python-version: "${{ matrix.python-version }}"

- name: "Setup conda"
uses: "conda-incubator/setup-miniconda@v2"
- name: "Setup uv"
uses: "astral-sh/setup-uv@v6"
with:
activate-environment: "DeepForest"
environment-file: "environment.yml"
python-version: "${{ matrix.python-version }}"
channels: conda-forge,spyder-ide
allow-softlinks: true
channel-priority: flexible
show-channel-urls: true
use-only-tar-bz2: true
enable-cache: true

- name: "Install dependencies"
run: "uv sync --extra dev"

- name: "Run tests"
run: "pytest -v"
run: "uv run pytest -v"
16 changes: 14 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
config.local.yml
.smoke_train_overrides.yml
results/**
!results/.gitkeep

.DS_Store
project.wpr
project.wpu
*.h5
__pycache__
data/raw/
data/processed/
# Large or machine-local artifacts (keep README / .gitkeep under data/*)
data/processed/**
data/external/**
!data/external/.gitkeep
!data/interim/.gitkeep
data/raw/**/*.csv
data/raw/**/*.zip
!data/raw/README.md
*.tif
*.png
!docs/figures/*.png
*.wpr
*.wpu
268 changes: 128 additions & 140 deletions README.md

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions SLURM/crown_plot_array.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
#SBATCH --job-name=dt_crown_plot
#SBATCH --account=ewhite
#SBATCH --cpus-per-task=2
#SBATCH --mem=16G
#SBATCH --time=04:00:00
#SBATCH --output=logs/crown_plot_%A_%a.out
#SBATCH --error=logs/crown_plot_%A_%a.err
##SBATCH --array=1-50%10

# One task per line in plots.txt (plotID). Example:
# awk 'NR>1{print $1}' plots_export.csv | sort -u > plots.txt # if first column is plotID
# Then: #SBATCH --array=1-$(wc -l < plots.txt)%10

set -euo pipefail
REPO_ROOT="${REPO_ROOT:-$HOME/DeepTreeAttention}"
export CONFIG_PATH="${CONFIG_PATH:-$REPO_ROOT/config.yml}"
CANOPY_POINTS="${CANOPY_POINTS:-$REPO_ROOT/data/interim/canopy_points.shp}"
PLOTS_FILE="${PLOTS_FILE:-$REPO_ROOT/plots.txt}"

cd "$REPO_ROOT"
mkdir -p logs

LINE_NO="${SLURM_ARRAY_TASK_ID:?Set SLURM_ARRAY_TASK_ID or submit with sbatch --array}"
PLOT="$(sed -n "${LINE_NO}p" "$PLOTS_FILE")"
if [[ -z "${PLOT}" ]]; then
echo "No plot on line ${LINE_NO} of ${PLOTS_FILE}"
exit 1
fi

RGB_GLOB="$(uv run python -c "import os; from src import utils; print(utils.read_config(os.environ['CONFIG_PATH'])['rgb_sensor_pool'])")"

uv run python -m src.pipelines.crown_one_plot \
--canopy-points "$CANOPY_POINTS" \
--plot "$PLOT" \
--rgb-glob "$RGB_GLOB" \
--savedir "${CROWN_BOX_DIR:-$REPO_ROOT/data/interim/boxes}" \
--raw-box-savedir "${RAW_BOX_DIR:-$REPO_ROOT/data/interim/raw_boxes}"
28 changes: 0 additions & 28 deletions SLURM/experiment.sh

This file was deleted.

30 changes: 30 additions & 0 deletions SLURM/osbs_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
#SBATCH --job-name=osbs_infer
#SBATCH --mail-type=END
#SBATCH --mail-user=benweinstein2010@gmail.com
#SBATCH --account=ewhite
#SBATCH --nodes=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=64GB
#SBATCH --time=48:00:00
#SBATCH --output=/home/b.weinstein/logs/osbs_inference_%j.out
#SBATCH --error=/home/b.weinstein/logs/osbs_inference_%j.err
#SBATCH --partition=gpu
#SBATCH --gpus=1

# OSBS tile inference (detection + species). Configure inference_osbs in config.yml, then submit.

set -euo pipefail

ulimit -c 0

REPO_ROOT="${REPO_ROOT:-${HOME}/DeepTreeAttention}"
CONFIG_PATH="${CONFIG_PATH:-${REPO_ROOT}/config.yml}"

module load git gcc 2>/dev/null || true
source activate DeepTreeAttention

cd "${REPO_ROOT}"
export PYTHONPATH="${REPO_ROOT}:${PYTHONPATH:-}"

python -m src.pipelines.osbs_inference --config "${CONFIG_PATH}"
30 changes: 30 additions & 0 deletions SLURM/osbs_mortality.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
#SBATCH --job-name=osbs_mortality
#SBATCH --mail-type=END
#SBATCH --mail-user=benweinstein2010@gmail.com
#SBATCH --account=ewhite
#SBATCH --nodes=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=96GB
#SBATCH --time=72:00:00
#SBATCH --output=/home/b.weinstein/logs/osbs_mortality_%j.out
#SBATCH --error=/home/b.weinstein/logs/osbs_mortality_%j.err
#SBATCH --partition=gpu
#SBATCH --gpus=1

# OSBS tile-scale mortality comparison. Configure osbs_mortality in config.yml, then submit.

set -euo pipefail

ulimit -c 0

REPO_ROOT="${REPO_ROOT:-${HOME}/DeepTreeAttention}"
CONFIG_PATH="${CONFIG_PATH:-${REPO_ROOT}/config.yml}"

module load git gcc 2>/dev/null || true
source activate DeepTreeAttention

cd "${REPO_ROOT}"
export PYTHONPATH="${REPO_ROOT}:${PYTHONPATH:-}"

python -m src.pipelines.osbs_mortality --config "${CONFIG_PATH}"
69 changes: 69 additions & 0 deletions SLURM/train_experiment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash
# Submit a queued GPU training run with a stable experiment identity for Comet.
#
# Usage (from login node, repo checked out on shared FS):
# cd /path/to/DeepTreeAttention
# export EXPERIMENT_NAME=osbs-epoch70-bs128-$(date +%Y%m%d)
# # optional: export DEEPTREE_OVERRIDES=... DEEPTREE_CONFIG=...
# sbatch SLURM/train_experiment.sh
#
# REPO_ROOT defaults to SLURM_SUBMIT_DIR (your cwd when you ran sbatch). Override
# if you submit from elsewhere: REPO_ROOT=/path/to/DeepTreeAttention sbatch ...
#
# Comet: set COMET_API_KEY (and optionally COMET_WORKSPACE) in the environment
# or load them from a secrets file before sbatch. DEEPTREE_EXPERIMENT_NAME is
# forwarded so the Comet UI name matches your SLURM intent.

#SBATCH --job-name=dt-train
#SBATCH --account=ewhite
#SBATCH --nodes=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=64G
#SBATCH --time=12:00:00
#SBATCH --partition=gpu
#SBATCH --gpus=1
#SBATCH --output=/home/b.weinstein/logs/train_%x_%j.out
#SBATCH --error=/home/b.weinstein/logs/train_%x_%j.err
#SBATCH --partition=hpg-turin
#SBATCH --ntasks-per-node=1
#SBATCH --gpus=1

set -euo pipefail

REPO_ROOT="${REPO_ROOT:-${SLURM_SUBMIT_DIR:-}}"
if [[ -z "${REPO_ROOT}" ]]; then
echo "[train_experiment] Set REPO_ROOT or run sbatch from the repo: cd .../DeepTreeAttention && sbatch SLURM/train_experiment.sh" >&2
exit 1
fi
if [[ ! -f "${REPO_ROOT}/train.py" ]]; then
echo "[train_experiment] REPO_ROOT=${REPO_ROOT} is not the repo root (no train.py). cd into DeepTreeAttention or set REPO_ROOT." >&2
exit 1
fi
cd "${REPO_ROOT}"

mkdir -p logs

# Stable name for Comet + log files (override when submitting)
export DEEPTREE_EXPERIMENT_NAME="${EXPERIMENT_NAME:-train-${SLURM_JOB_ID}}"

# Optional merged config fragment (same as train.py --overrides)
OVERRIDES_ARGS=()
if [[ -n "${DEEPTREE_OVERRIDES:-}" ]]; then
OVERRIDES_ARGS=(--overrides "${DEEPTREE_OVERRIDES}")
fi

CONFIG_PATH="${DEEPTREE_CONFIG:-config.yml}"

# Use uv if available (recommended); fall back to python on the module path.
if command -v uv >/dev/null 2>&1; then
RUN=(uv run python train.py --config "${CONFIG_PATH}" "${OVERRIDES_ARGS[@]}" --experiment-name "${DEEPTREE_EXPERIMENT_NAME}")
else
RUN=(python train.py --config "${CONFIG_PATH}" "${OVERRIDES_ARGS[@]}" --experiment-name "${DEEPTREE_EXPERIMENT_NAME}")
fi

echo "[train_experiment] SLURM_JOB_ID=${SLURM_JOB_ID:-}"
echo "[train_experiment] DEEPTREE_EXPERIMENT_NAME=${DEEPTREE_EXPERIMENT_NAME}"
echo "[train_experiment] REPO_ROOT=${REPO_ROOT}"
echo "[train_experiment] running: ${RUN[*]}"

exec "${RUN[@]}"
16 changes: 8 additions & 8 deletions abundance.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#Plot abundance distribution
from concurrent.futures import ThreadPoolExecutor, as_completed
from glob import glob
import os
import pandas as pd
import geopandas as gpd
from src import start_cluster

client = start_cluster.start(cpus=75,mem_size="10GB")
_IO_WORKERS = min(32, (os.cpu_count() or 4) * 4)

##Same data

Expand Down Expand Up @@ -43,9 +43,9 @@ def read_shp(path):
print(files)
if len(files) == 0:
continue
counts = []
futures = client.map(read_shp,files)
counts = [x.result() for x in futures]
with ThreadPoolExecutor(max_workers=_IO_WORKERS) as ex:
futures = [ex.submit(read_shp, f) for f in files]
counts = [f.result() for f in as_completed(futures)]
total_counts = pd.Series()
for ser in counts:
total_counts = total_counts.add(ser, fill_value=0)
Expand Down Expand Up @@ -89,9 +89,9 @@ def read_shp(path):
print(files)
if len(files) == 0:
continue
counts = []
futures = client.map(read_shp,files)
counts = [x.result() for x in futures]
with ThreadPoolExecutor(max_workers=_IO_WORKERS) as ex:
futures = [ex.submit(read_shp, f) for f in files]
counts = [f.result() for f in as_completed(futures)]
total_counts = pd.Series()
for ser in counts:
total_counts = total_counts.add(ser, fill_value=0)
Expand Down
30 changes: 30 additions & 0 deletions config.smoke.example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copy to ``config.local.yml`` or pass ``--overrides config.smoke.example.yml`` with ``train.py``.
# If your ``use_data_commit`` folder has split CSVs like ``train_<hash>_[\"OSBS\"].csv`` instead of
# ``train.csv`` / ``test.csv``, set ``processed_train_csv`` and ``processed_test_csv`` (relative to that folder).

epochs: 1
workers: 0
preload_images: false
batch_size: 32

# Lightning: keep runs tiny (metrics on partial data are not meaningful)
limit_train_batches: 2
limit_val_batches: 2
limit_predict_batches: 4

use_comet: false
checkpoint_dir: results/checkpoints

# CPU smoke (set accelerator: auto, devices: 1 for a short GPU check)
accelerator: cpu
devices: 1
accelerator: cpu

# Faster crown detection during OSBS inference smoke (optional)
deepforest_dead_cropmodel_name: null

inference_osbs:
tile_limit: 1
predict_limit_batches: 2
# After a smoke train, point this at results/checkpoints/<id>.pt
# species_checkpoint: results/checkpoints/smoke.pt
18 changes: 18 additions & 0 deletions config.smoke.local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
data_dir: data/processed
use_data_commit: 4c02ae98bd774aa494fadb3508ae84ba
processed_train_csv: train_dd0adf605011f67ea3e3626231a9713a04a9e85e_['OSBS'].csv
processed_test_csv: test_dd0adf605011f67ea3e3626231a9713a04a9e85e_['OSBS'].csv

epochs: 1
workers: 0
preload_images: false
batch_size: 16

limit_train_batches: 2
limit_val_batches: 2
limit_predict_batches: 2

use_comet: false
checkpoint_dir: results/checkpoints
accelerator: cpu
devices: 1
Loading
Loading