Skip to content

Commit 01663e3

Browse files
authored
Merge pull request #23606 from Flamefire/20250808105302_new_pr_nvidia-cutlass3800
{lib}[gfbf/2024a] nvidia-cutlass v3.8.0.0 w/ CUDA 12.6.0
2 parents a59f288 + 93c5a08 commit 01663e3

2 files changed

Lines changed: 105 additions & 0 deletions

File tree

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
easyblock = 'PythonBundle'
2+
3+
name = 'nvidia-cutlass'
4+
version = '3.8.0.0'
5+
versionsuffix = '-CUDA-%(cudaver)s'
6+
7+
homepage = 'https://pypi.org/project/nvidia-cutlass'
8+
description = """
9+
CUTLASS is a collection of CUDA C++ template abstractions for implementing high-performance
10+
matrix-matrix multiplication (GEMM) and related computations at all levels and scales within CUDA.
11+
It incorporates strategies for hierarchical decomposition and data movement similar to those used
12+
to implement cuBLAS and cuDNN.
13+
CUTLASS decomposes these "moving parts" into reusable, modular software components abstracted by C++ template classes.
14+
Primitives for different levels of a conceptual parallelization hierarchy can be specialized and tuned
15+
via custom tiling sizes, data types, and other algorithmic policy.
16+
The resulting flexibility simplifies their use as building blocks within custom kernels and applications.
17+
"""
18+
19+
toolchain = {'name': 'gfbf', 'version': '2024a'}
20+
21+
builddependencies = [
22+
('poetry', '1.8.3'),
23+
]
24+
25+
dependencies = [
26+
('CUDA', '12.6.0', '', SYSTEM),
27+
('CUDA-Python', '12.6.2.post1', versionsuffix),
28+
('Python', '3.12.3'),
29+
('Python-bundle-PyPI', '2024.06'),
30+
('SciPy-bundle', '2024.05'),
31+
('networkx', '3.4.2'),
32+
('pydot', '3.0.3'),
33+
]
34+
35+
exts_list = [
36+
('treelib', '1.8.0', {
37+
'sources': [SOURCE_TAR_GZ],
38+
'checksums': ['e1be2c6b66ffbfae85079fc4c76fb4909946d01d915ee29ff6795de53aed5d55'],
39+
}),
40+
(name, version, {
41+
'source_tmpl': 'nvidia_cutlass-%(version)s-py3-none-any.whl',
42+
'post_install_patches': [{
43+
'name': 'nvidia-cutlass-3.8.0.0_fix-BytesWarning.patch',
44+
'sourcepath': 'lib/python%(pyshortver)s/site-packages/cutlass',
45+
'level': 3,
46+
}],
47+
'checksums': [
48+
'013147221a63500205da233ae02e6262463917f3fe39cb09efbca37bfd1c39f9',
49+
{'nvidia-cutlass-3.8.0.0_fix-BytesWarning.patch':
50+
'63eb47894340c0ea03d0d2faaa49c1979915f903b5bc2ced17f8e0dd5ab854ed'},
51+
],
52+
'modulename': 'cutlass',
53+
}),
54+
]
55+
56+
sanity_check_commands = [
57+
'python -sc "import cutlass_library"',
58+
'python -bb -sc "' + '; '.join((
59+
'import cutlass',
60+
# These serves as a smoke test, e.g. nvcc_version() was incompatible with -bb
61+
"assert cutlass.nvcc_version().startswith('%(cudamajver)s')",
62+
"assert cutlass.cuda_install_path() == '$EBROOTCUDA'",
63+
)) + '"',
64+
]
65+
66+
moduleclass = 'lib'
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
When neither `text` nor `encoding` is specified to `subprocess.run` then `stdout` will be of type `bytes`.
2+
A subsequent `str(stdout)` causes a `BytesWarning` which might result in errors, e.g. if `python -bb` is used.
3+
See https://github.com/NVIDIA/cutlass/pull/2682
4+
5+
Fixes hard failures in PyTorch tests that do use `-bb`.
6+
7+
Author: Alexander Grund (TU Dresden)
8+
9+
diff --git a/python/cutlass_cppgen/__init__.py b/python/cutlass_cppgen/__init__.py
10+
index 9bdd259c02..0e28ff55fd 100644
11+
--- a/python/cutlass_cppgen/__init__.py
12+
+++ b/python/cutlass_cppgen/__init__.py
13+
@@ -39,11 +39,11 @@
14+
def _cuda_install_path_from_nvcc() -> str:
15+
import subprocess
16+
# Attempt to detect CUDA_INSTALL_PATH based on location of NVCC
17+
- result = subprocess.run(['/usr/bin/which', 'nvcc'], capture_output=True)
18+
+ result = subprocess.run(['/usr/bin/which', 'nvcc'], capture_output=True, text=True)
19+
if result.returncode != 0:
20+
raise Exception(f'Unable to find nvcc via `which` utility.')
21+
22+
- cuda_install_path = result.stdout.decode('utf-8').split('/bin/nvcc')[0]
23+
+ cuda_install_path = result.stdout.split('/bin/nvcc')[0]
24+
if not os.path.isdir(cuda_install_path):
25+
raise Exception(f'Environment variable "CUDA_INSTALL_PATH" is not defined, '
26+
f'and default path of {cuda_install_path} does not exist.')
27+
@@ -63,10 +63,10 @@ def nvcc_version():
28+
import subprocess
29+
30+
# Attempt to get NVCC version
31+
- result = subprocess.run(['nvcc', '--version'], capture_output=True)
32+
+ result = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
33+
if result.returncode != 0:
34+
raise Exception('Unable to run `nvcc --version')
35+
- _NVCC_VERSION = str(result.stdout).split(" release ")[-1].split(",")[0]
36+
+ _NVCC_VERSION = result.stdout.split(" release ")[-1].split(",")[0]
37+
return _NVCC_VERSION
38+
39+
_CUDA_INSTALL_PATH = None

0 commit comments

Comments
 (0)