Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion libvmaf/src/cuda/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#ifndef __VMAF_SRC_CUDA_COMMON_H__
#define __VMAF_SRC_CUDA_COMMON_H__

#include <pthread.h>
#include <stdbool.h>

#include "config.h"
Expand Down
4 changes: 4 additions & 0 deletions libvmaf/src/cuda/cuda_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@

#include "assert.h"
#include "stdio.h"
#ifdef DEVICE_CODE
#include <cuda.h>
#else
#include <ffnvcodec/dynlink_loader.h>
#endif

#define DIV_ROUND_UP(x, y) (((x) + (y)-1) / (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/cuda/integer_adm/adm_cm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
* limitations under the License.
*
*/
#ifndef DEVICE_CODE
#include "feature_collector.h"
#endif
#include "cuda/integer_adm_cuda.h"
#include "common.h"
#include "cuda_helper.cuh"
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/cuda/integer_adm/adm_csf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
*
*/

#ifndef DEVICE_CODE
#include "feature_collector.h"
#endif
#include "cuda/integer_adm_cuda.h"

#include "common.h"
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/cuda/integer_adm/adm_csf_den.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
*
*/

#ifndef DEVICE_CODE
#include "feature_collector.h"
#endif
#include "cuda/integer_adm_cuda.h"

#include "common.h"
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/cuda/integer_adm/adm_decouple.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
*
*/

#ifndef DEVICE_CODE
#include "feature_collector.h"
#endif
#include "cuda/integer_adm_cuda.h"

#include "common.h"
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/cuda/integer_adm/adm_dwt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
*
*/

#ifndef DEVICE_CODE
#include "feature_collector.h"
#endif
#include "cuda/integer_adm_cuda.h"

#include "common.h"
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/integer_adm.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,12 @@ struct dwt_model_params {
};

// 0 -> Y, 1 -> Cb, 2 -> Cr
#ifndef __CUDACC__
static const struct dwt_model_params dwt_7_9_YCbCr_threshold[3] = {
{.a = 0.495, .k = 0.466, .f0 = 0.401, .g = {1.501, 1.0, 0.534, 1.0}},
{.a = 1.633, .k = 0.353, .f0 = 0.209, .g = {1.520, 1.0, 0.502, 1.0}},
{.a = 0.944, .k = 0.521, .f0 = 0.404, .g = {1.868, 1.0, 0.516, 1.0}}};
#endif

/*
* The following dwt basis function amplitudes, A(lambda,theta), are taken from
Expand Down
80 changes: 70 additions & 10 deletions libvmaf/src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ is_asm_enabled = get_option('enable_asm') == true
is_cuda_enabled = get_option('enable_cuda') == true
is_avx512_enabled = get_option('enable_avx512') == true
is_nvtx_enabled = get_option('enable_nvtx') == true
if is_nvtx_enabled and host_machine.system() == 'windows'
warning('NVTX is not supported on Windows, disabling.')
is_nvtx_enabled = false
endif

if is_nvtx_enabled
if is_nvtx_enabled
cdata.set10('HAVE_NVTX', is_nvtx_enabled)
endif
if is_cuda_enabled
Expand Down Expand Up @@ -310,23 +314,80 @@ if is_cuda_enabled
]
gencode = []
if get_option('enable_nvcc')
cuda_lang = add_languages('cuda', required : true)
cuda_compiler = meson.get_compiler('cuda')
nvcc_exe = find_program('nvcc')

# On Windows, nvcc requires MSVC's cl.exe as host compiler for preprocessing.
# We avoid adding cl.exe to PATH as that causes meson to pick MSVC as the
# default C compiler. Use vswhere + powershell to find cl.exe automatically.
if host_machine.system() == 'windows'
cl_find_result = run_command('powershell', '-NoProfile', '-Command',
'(Get-ChildItem -Path (& "${env:ProgramFiles(x86)}/Microsoft Visual Studio/Installer/vswhere.exe" -latest -products * -property installationPath) -Recurse -Filter cl.exe -ErrorAction SilentlyContinue | Where-Object { $_.DirectoryName -like "*HostX64*x64*" } | Select-Object -First 1).FullName',
check: false)
if cl_find_result.returncode() == 0 and cl_find_result.stdout().strip() != ''
cl_path = cl_find_result.stdout().strip()
message('Found MSVC cl.exe at: ' + cl_path)
nvcc_ccbin_flags = ['--allow-unsupported-compiler', '-ccbin', cl_path]
else
cl_exe = find_program('cl', required: false)
if cl_exe.found()
nvcc_ccbin_flags = ['--allow-unsupported-compiler', '-ccbin', cl_exe.full_path()]
else
error('MSVC cl.exe not found. nvcc on Windows requires Visual Studio Build Tools.')
endif
endif

# Find MSVC and Windows SDK include dirs so cl.exe can find system
# headers when invoked by nvcc outside a vcvars environment.
# Pass them as -I flags directly to nvcc.
# Derive MSVC include path from cl.exe's location to ensure version match.
msvc_include_result = run_command('powershell', '-NoProfile', '-Command',
'$clPath = "' + cl_path + '"; ' +
'$msvcRoot = ($clPath -replace "\\\\bin\\\\.*$", ""); ' +
'Write-Output $msvcRoot',
check: false)
winsdk_include_result = run_command('powershell', '-NoProfile', '-Command',
'$sdkInc = Get-ChildItem "${env:ProgramFiles(x86)}/Windows Kits/10/Include" -Directory | Sort-Object Name | Select-Object -Last 1; ' +
'Write-Output $sdkInc.FullName',
check: false)
nvcc_host_includes = []
if msvc_include_result.returncode() == 0 and winsdk_include_result.returncode() == 0
msvc_root_dir = msvc_include_result.stdout().strip()
winsdk_inc_dir = winsdk_include_result.stdout().strip()
nvcc_host_includes = [
'-I', msvc_root_dir / 'include',
'-I', winsdk_inc_dir / 'ucrt',
'-I', winsdk_inc_dir / 'shared',
'-I', winsdk_inc_dir / 'um',
]
message('MSVC include: ' + msvc_root_dir / 'include')
message('UCRT include: ' + winsdk_inc_dir / 'ucrt')
else
warning('Could not find MSVC/Windows SDK include paths. nvcc compilation may fail.')
endif
else
nvcc_ccbin_flags = []
nvcc_host_includes = []
endif

# Detect CUDA version from nvcc directly instead of add_languages('cuda'),
# which requires MSVC on Windows and fails with MinGW.
# nvcc --version outputs: "...Cuda compilation tools, release 12.6, V12.6.85..."
nvcc_ver_out = run_command(nvcc_exe, '--version', check: true)
cuda_version = nvcc_ver_out.stdout().strip().split('release ')[1].split(',')[0]
message('Found CUDA version = @0@'.format(cuda_version))

gencode = [
'--fatbin',
'-gencode=arch=compute_75,code=sm_75',
'-gencode=arch=compute_80,code=sm_80',
]
message('Found CUDA version = @0@'.format(cuda_compiler.version()))
if cuda_compiler.version().version_compare('<13')
if cuda_version.version_compare('<13')
gencode += '-gencode=arch=compute_50,code=compute_50'
endif
# always compile device code to enable quick startup on newer GPUs, for the last supported GPU also generate PTX for future compatibility
if cuda_compiler.version().version_compare('>11.8')
if cuda_version.version_compare('>11.8')
gencode += '-gencode=arch=compute_90,code=sm_90'
if cuda_compiler.version().version_compare('>12.8')
if cuda_version.version_compare('>12.8')
gencode += [
'-gencode=arch=compute_100,code=sm_100',
'-gencode=arch=compute_120,code=sm_120',
Expand All @@ -353,7 +414,7 @@ if is_cuda_enabled
message('CUDA gencode = @0@'.format(gencode))


cuda_flags = []
cuda_flags = ['-D_USE_MATH_DEFINES']
if get_option('buildtype') == 'debug'
cuda_flags += ['-DCUDA_DEBUG', '-G']
else
Expand All @@ -378,7 +439,7 @@ if is_cuda_enabled
'-I', '../src/feature',
'-I', '../src/' + cuda_dir,
'-DDEVICE_CODE',
] + cuda_flags
] + cuda_flags + nvcc_ccbin_flags + nvcc_host_includes
)
ptx_files += {name : [t]}
endforeach
Expand Down Expand Up @@ -414,7 +475,6 @@ if is_cuda_enabled
cuda_inc,
],
c_args : vmaf_cflags_common,
cuda_args: cuda_flags # + ['-gencode', 'arch=compute_86,code=sm_86' ] #, '--use_fast_math']
)

common_cuda_objects += cuda_static_lib.extract_all_objects()
Expand Down
5 changes: 5 additions & 0 deletions libvmaf/src/picture.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,14 @@
#define __VMAF_SRC_PICTURE_H__

#ifdef HAVE_CUDA
#ifdef DEVICE_CODE
#include <cuda.h>
typedef struct VmafCudaState VmafCudaState;
#else
#include <ffnvcodec/dynlink_cuda.h>
#include "libvmaf/libvmaf_cuda.h"
#endif
#endif
#include "libvmaf/picture.h"

enum VmafPictureBufferType {
Expand Down