LeelaChessZero · almaudoh · Mar 22, 2022 · Mar 23, 2022 · Mar 23, 2022 · Mar 23, 2022
diff --git a/build.cmd b/build.cmd
@@ -2,7 +2,7 @@
 setlocal
 
 rem 1. Set the following for the options you want to build.
-set CUDNN=true
+set CUDNN=false
 set CUDA=true
 set DX12=false
 set OPENCL=false
@@ -11,10 +11,12 @@ set DNNL=false
 set OPENBLAS=false
 set EIGEN=false
 set TEST=false
+set CUTLASS=true
 
 rem 2. Edit the paths for the build dependencies.
-set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
+set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0
 set CUDNN_PATH=%CUDA_PATH%
+set CUTLASS_INCLUDE_PATH=C:\dev\cutlass-2.11.0\include
 set OPENBLAS_PATH=C:\OpenBLAS
 set MKL_PATH=C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl
 set DNNL_PATH=C:\dnnl_win_1.1.1_cpu_vcomp
@@ -34,13 +36,13 @@ if exist "C:\Program Files\Microsoft Visual Studio\2022" (
   where /q cl
   if errorlevel 1 call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
   set backend=vs2022
-) else if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019" (
+) else if exist "D:\Program Files (x86)\Microsoft Visual Studio\2019" (
   where /q cl
-  if errorlevel 1 call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
+  if errorlevel 1 call "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
   set backend=vs2019
 ) else (
   where /q cl
-  if errorlevel 1 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
+  if errorlevel 1 call "D:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
   set backend=vs2017
 )
 
@@ -63,6 +65,7 @@ meson build --backend %backend% --buildtype release -Ddx=%DX12% -Dcudnn=%CUDNN%
 -Dmkl_include="%MKL_PATH%\include" -Dmkl_libdirs="%MKL_PATH%\lib\intel64" -Ddnnl_dir="%DNNL_PATH%" ^
 -Dopencl_libdirs="%OPENCL_LIB_PATH%" -Dopencl_include="%OPENCL_INCLUDE_PATH%" ^
 -Dopenblas_include="%OPENBLAS_PATH%\include" -Dopenblas_libdirs="%OPENBLAS_PATH%\lib" ^
+-Dcutlass_include="%CUTLASS_INCLUDE_PATH%" -Dcutlass="%CUTLASS%" ^
 -Ddefault_library=static
 
 if errorlevel 1 exit /b

diff --git a/libs/lczero-common b/libs/lczero-common
diff --git a/meson.build b/meson.build
@@ -485,6 +485,11 @@ if get_option('build_backends')
       cuda_arguments += ['-ccbin=' + get_option('nvcc_ccbin')]
     endif
     cuda_cc = get_option('cc_cuda') # Unfortunately option cuda_cc is reserved.
+	if get_option('cutlass')
+	  add_project_arguments('-DUSE_CUTLASS', language : 'cpp')
+	  cuda_arguments += ['-DUSE_CUTLASS']
+	  cuda_arguments += ['-I', get_option('cutlass_include')]
+	endif
     nvcc_extra_args = []
     if cuda_cc != ''
       nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc]
@@ -522,6 +527,15 @@ if get_option('build_backends')
       depend_files: 'src/neural/cuda/winograd_helper.inc',
       command : [nvcc, nvcc_extra_args, cuda_arguments]
     )
+
+	if get_option('cutlass')
+	  nvcc_cutlass_args = ['-arch=compute_80', '-code=sm_80']
+      files += custom_target('cuda cutlass code',
+        input : 'src/neural/cuda/cutlass_kernels.cu',
+        output : outputname,
+        command : [nvcc, nvcc_cutlass_args, cuda_arguments]
+      )
+    endif	
 
     files += custom_target('cuda fp16 code',
       input : 'src/neural/cuda/fp16_kernels.cu',

diff --git a/meson_options.txt b/meson_options.txt
@@ -43,6 +43,11 @@ option('cudnn_include',
        value: ['/opt/cuda/include/', '/usr/local/cuda/include/', '/usr/lib/cuda/include/'],
        description: 'Paths to cudnn include directory')
 
+option('cutlass_include',
+       type: 'string',
+       value: '/usr',
+       description: 'Paths to cutlass include directory')
+
 option('build_backends',
        type: 'boolean',
        value: true,
@@ -78,6 +83,11 @@ option('plain_cuda',
        value: true,
        description: 'Enable CUDA backend')
 
+option('cutlass',
+       type: 'boolean',
+       value: false,
+       description: 'Enable cutlass lib for cuda backend. Only supports Ampere+ right now')
+
 option('opencl',
        type: 'boolean',
        value: true,