diff --git a/CMakeLists.txt b/CMakeLists.txt index a6aacfb..5e01282 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,10 @@ option(DECREASE_RANSAC_AREA "Do not use 10% tracks for RANSAC near the image bor option(CUVSLAM_BUILD_SHARED_LIB "Build shared library version of cuVSLAM" TRUE) option(USE_RERUN "Use Rerun for visualization" OFF) +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES "all" CACHE STRING "CUDA architectures to compile for (e.g. 87, 86, all)") +endif() + include(cmake/cuVSLAMUtils.cmake) setup_cuvslam_settings() diff --git a/README.md b/README.md index cf049e9..92b8962 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,42 @@ make -j ``` 2. Update SRC & DST paths in `build_release.sh` +### Build natively on Jetson (aarch64) + +For building directly on a Jetson Orin device (e.g. Orin Nano, Orin NX, AGX Orin): + +1. Install build dependencies (JetPack provides CUDA runtime but not all dev packages): + ```bash + sudo apt-get update + sudo apt-get install g++ cmake git git-lfs python3-dev libcublas-dev-12-6 libcusolver-dev-12-6 + ``` + `libcublas-dev` and `libcusolver-dev` provide the headers, unversioned linker symlinks, and cmake config files needed at build time. JetPack only ships the runtime libraries by default. + +2. Clone the repository and pull LFS data (test images and datasets are stored with Git LFS): + ```bash + git clone https://github.com/nvidia-isaac/cuVSLAM.git + cd cuVSLAM + git lfs install + git lfs pull + ``` + +3. Set source and build paths (add to `~/.bashrc` for persistence): + ```bash + export CUVSLAM_SRC_DIR=~/cuVSLAM + export CUVSLAM_DST_DIR=~/cuVSLAM/build + ``` + +4. Build targeting your specific GPU architecture: + ```bash + ./build_release.sh --cuda_arch=87 + ``` + Use `--cuda_arch=87` for Orin Nano/NX/AGX (SM_87, Ampere). Omit for the default (`all` architectures). Building for a single architecture reduces binary size and improves register allocation. + +5. Run tests to verify the build: + ```bash + ./build_release.sh --cuda_arch=87 --modules_test + ``` + ### Build on remote ARM Requires SSH access to the remote device. diff --git a/build_release.sh b/build_release.sh index 9c1944e..10194ff 100755 --- a/build_release.sh +++ b/build_release.sh @@ -14,6 +14,7 @@ # --build_lib Build cuvslam library and python bindings # --build_docs Build documentation # --build_type=TYPE Set CMake build type (Debug|Release[default]|RelWithDebInfo|MinSizeRel) +# --cuda_arch=ARCH Set CUDA architecture target (e.g. 87 for Orin Nano, default: all) # --jobs=N Set number of parallel jobs (default: 8) # # Environment variables (optional): @@ -35,6 +36,7 @@ APITESTS=false LIBBUILD=false BUILDDOCS=false BUILD_TYPE="Release" +CUDA_ARCH="" USE_RERUN=OFF SRC=/cuvslam/src @@ -69,6 +71,9 @@ while [ "$#" -gt 0 ]; do --jobs=*) MAKE_JOBS="${1#*=}" ;; + --cuda_arch=*) + CUDA_ARCH="${1#*=}" + ;; --build_type=*) BUILD_TYPE="${1#*=}" ;; @@ -95,14 +100,18 @@ set -v # echo each command mkdir -p $DST cd $DST -cmake -DUSE_RERUN=$USE_RERUN -DCMAKE_BUILD_TYPE=$BUILD_TYPE -S $SRC -B $DST +CMAKE_ARGS="-DUSE_RERUN=$USE_RERUN -DCMAKE_BUILD_TYPE=$BUILD_TYPE" +if [ -n "$CUDA_ARCH" ]; then + CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH" +fi +cmake $CMAKE_ARGS -S $SRC -B $DST # Build all CMake targets regardless of the flags make -j${MAKE_JOBS} -C $DST # Step 1: Run module tests if is_true "$MODULETESTS"; then echo "Module tests executed." - GTEST_FILTER=-*SpeedUp* ctest --output-on-failure || exit 1 + GTEST_FILTER=-*SpeedUp*:*Speedup* ctest --output-on-failure || exit 1 else echo "Module tests skipped." fi diff --git a/libs/cuda_modules/cuda_kernels/CMakeLists.txt b/libs/cuda_modules/cuda_kernels/CMakeLists.txt index 20d91f4..7310599 100644 --- a/libs/cuda_modules/cuda_kernels/CMakeLists.txt +++ b/libs/cuda_modules/cuda_kernels/CMakeLists.txt @@ -31,15 +31,13 @@ set(SOURCES add_library(cuda_kernels STATIC ${SOURCES}) set_target_properties(cuda_kernels PROPERTIES - # CMAKE_CUDA_ARCHITECTURES is set to some unusable value by default, so we can't use it to choose architectures - # CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON ) target_compile_options(cuda_kernels PRIVATE $<$:--compiler-options=-fPIC,-fvisibility=hidden> - $<$:-arch=all> # instead of CMAKE_CUDA_ARCHITECTURES # Enable device debug info for compute-sanitizer support $<$,$>:-G> $<$,$>:-lineinfo>