* Upgrade presets for FFmpeg 4.3.1, DNNL 1.5.1, SciPy 1.5.1, CUDA 11…

….0.2, NCCL 2.7.6, MXNet 1.7.0.rc0, Skia 2.80.0
bytedeco · Jul 16, 2020 · 0ed7959 · 0ed7959
1 parent 3717741
commit 0ed7959
Show file tree

Hide file tree

Showing 135 changed files with 4,850 additions and 1,440 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@
  * Add support for Windows to presets for Qt ([issue #862](https://github.com/bytedeco/javacpp-presets/issues/862))
  * Fix JPMS modules for CUDA, ARPACK-NG, GSL, SciPy, Gym, MXNet ([pull #880](https://github.com/bytedeco/javacpp-presets/pull/880) and [pull #881](https://github.com/bytedeco/javacpp-presets/pull/881))
  * Build OpenBLAS with a `TARGET` even for `DYNAMIC_ARCH` to avoid SIGILL ([issue eclipse/deeplearning4j#8747](https://github.com/eclipse/deeplearning4j/issues/8747))
- * Upgrade presets for FFmpeg 4.3 ([pull #891](https://github.com/bytedeco/javacpp-presets/pull/891)), Arrow 0.17.1, Hyperscan 5.3.0, MKL-DNN 0.21.5, DNNL 1.5, OpenBLAS 0.3.10, CPython 3.7.8, NumPy 1.19.0, SciPy 1.5.0, Gym 0.17.2, CUDA 11.0, cuDNN 8.0.1, NCCL 2.7.3, TensorFlow 1.15.3, TensorRT 7.1, ONNX 1.7.0 ([pull #882](https://github.com/bytedeco/javacpp-presets/pull/882)), ONNX Runtime 1.3.1 ([pull #887](https://github.com/bytedeco/javacpp-presets/pull/887)), Qt 5.15.0, Skia 1.68.3, and their dependencies
+ * Upgrade presets for FFmpeg 4.3.1 ([pull #891](https://github.com/bytedeco/javacpp-presets/pull/891)), Arrow 0.17.1, Hyperscan 5.3.0, MKL-DNN 0.21.5, DNNL 1.5.1, OpenBLAS 0.3.10, CPython 3.7.8, NumPy 1.19.0, SciPy 1.5.1, Gym 0.17.2, CUDA 11.0.2, cuDNN 8.0.1, NCCL 2.7.6, MXNet 1.7.0.rc0, TensorFlow 1.15.3, TensorRT 7.1, ONNX 1.7.0 ([pull #882](https://github.com/bytedeco/javacpp-presets/pull/882)), ONNX Runtime 1.3.1 ([pull #887](https://github.com/bytedeco/javacpp-presets/pull/887)), Qt 5.15.0, Skia 2.80.0, and their dependencies
  * Add `FullOptimization.h` allowing users to fully optimize LLVM modules ([pull #869](https://github.com/bytedeco/javacpp-presets/pull/869))
 
 ### April 14, 2020 version 1.5.3

diff --git a/README.md b/README.md
@@ -140,10 +140,10 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * Tesseract 4.1.1  https://github.com/tesseract-ocr/tesseract
  * Caffe 1.0  https://github.com/BVLC/caffe
  * OpenPose 1.6.0  https://github.com/CMU-Perceptual-Computing-Lab/openpose
- * CUDA 11.0  https://developer.nvidia.com/cuda-downloads
+ * CUDA 11.0.x  https://developer.nvidia.com/cuda-downloads
    * cuDNN 8.0.x  https://developer.nvidia.com/cudnn
    * NCCL 2.7.x  https://developer.nvidia.com/nccl
- * MXNet 1.6.0  https://github.com/apache/incubator-mxnet
+ * MXNet 1.7.0  https://github.com/apache/incubator-mxnet
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorRT 7.x  https://developer.nvidia.com/tensorrt
  * The Arcade Learning Environment 0.6.x  https://github.com/mgbellemare/Arcade-Learning-Environment
@@ -152,7 +152,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * ONNX Runtime 1.3.x  https://github.com/microsoft/onnxruntime
  * LiquidFun  http://google.github.io/liquidfun/
  * Qt 5.15.x  https://download.qt.io/archive/qt/
- * Mono/Skia 1.68.x  https://github.com/mono/skia
+ * Mono/Skia 2.80.x  https://github.com/mono/skia
  * cpu_features 0.4.1  https://github.com/google/cpu_features
  * System APIs of the build environments:
    * Linux (glibc)  https://www.gnu.org/software/libc/

diff --git a/ci/build.cmd b/ci/build.cmd
@@ -41,6 +41,7 @@ if exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit" (
     SET "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.0"
     SET "CUDA_PATH_V11_0=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.0"
     SET "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.0\libnvvp;%PATH%"
+    echo CUDA Version 11.0.182>"%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.0\version.txt"
 )
 
 echo Building for "%APPVEYOR_REPO_BRANCH%"

diff --git a/ci/ccache-cuda.patch b/ci/ccache-cuda.patch
diff --git a/ci/install-ppc.sh b/ci/install-ppc.sh
@@ -71,10 +71,10 @@ docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "update-ca-certificates -f"
 if [[ "$PROJ" =~ cuda ]]; then
    echo "Setting up for cuda build"
    cd $HOME/
-   curl -L http://developer.download.nvidia.com/compute/cuda/11.0.1/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.1-450.36.06-1_ppc64el.deb -o $HOME/cuda-repo-ubuntu1804-11-0-local_11.0.1-450.36.06-1_ppc64el.deb
+   curl -L http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.2-450.51.05-1_ppc64el.deb -o $HOME/cuda-repo-ubuntu1804-11-0-local_11.0.2-450.51.05-1_ppc64el.deb
    curl -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.0.1/cudnn-11.0-linux-ppc64le-v8.0.1.13.tgz -o $HOME/cudnn-11.0-linux-ppc64le-v8.0.1.13.tgz
-   curl -L https://developer.download.nvidia.com/compute/redist/nccl/v2.7/nccl_2.7.3-1+cuda11.0_ppc64le.txz -o $HOME/nccl_ppc64le.txz
-   ar vx $HOME/cuda-repo-ubuntu1804-11-0-local_11.0.1-450.36.06-1_ppc64el.deb
+   curl -L https://developer.download.nvidia.com/compute/redist/nccl/v2.7/nccl_2.7.6-1+cuda11.0_ppc64le.txz -o $HOME/nccl_ppc64le.txz
+   ar vx $HOME/cuda-repo-ubuntu1804-11-0-local_11.0.2-450.51.05-1_ppc64el.deb
    tar xvf data.tar.xz
    mkdir $HOME/cudaFS
    cd var; find . -name *.deb | while read line; do ar vx $line; tar --totals -xf data.tar.xz -C $HOME/cudaFS; done

diff --git a/ci/install-travis.sh b/ci/install-travis.sh
@@ -31,9 +31,8 @@ export MAKEJ=2
 echo "export MAKEJ=2" | tee --append $HOME/vars.list
 
 # Try to use ccache to speed up the build
-curl -L https://github.com/ccache/ccache/releases/download/v3.7/ccache-3.7.tar.gz -o $HOME/ccache-3.7.tar.gz
-tar xvf $HOME/ccache-3.7.tar.gz -C $HOME
-patch -Np1 -d $HOME/ccache-3.7/ < $TRAVIS_BUILD_DIR/ci/ccache-cuda.patch
+curl -L https://github.com/ccache/ccache/releases/download/v3.7.10/ccache-3.7.10.tar.gz -o $HOME/ccache-3.7.10.tar.gz
+tar xvf $HOME/ccache-3.7.10.tar.gz -C $HOME
 export CCACHE_DIR=$HOME/.ccache
 export PATH=/usr/lib64/ccache/:/usr/lib/ccache/:$PATH
 echo "export CCACHE_DIR=$HOME/.ccache" | tee --append $HOME/vars.list
@@ -158,11 +157,11 @@ if [[ "$OS" == "linux-x86" ]] || [[ "$OS" == "linux-x86_64" ]] || [[ "$OS" =~ an
   fi
   if [[ "$PROJ" =~ cuda ]] || [[ "$PROJ" == "tensorrt" ]] || [[ "$EXT" =~ gpu ]]; then
         echo "installing cuda, cudnn, and nccl.."
-        curl -L http://developer.download.nvidia.com/compute/cuda/11.0.1/local_installers/cuda-repo-rhel7-11-0-local-11.0.1_450.36.06-1.x86_64.rpm -o $HOME/cuda-repo-rhel7-11-0-local-11.0.1_450.36.06-1.x86_64.rpm
+        curl -L http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda-repo-rhel7-11-0-local-11.0.2_450.51.05-1.x86_64.rpm -o $HOME/cuda-repo-rhel7-11-0-local-11.0.2_450.51.05-1.x86_64.rpm
         curl -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.0.1/cudnn-11.0-linux-x64-v8.0.1.13.tgz -o $HOME/cudnn-11.0-linux-x64-v8.0.1.13.tgz
-        curl -L https://developer.download.nvidia.com/compute/redist/nccl/v2.7/nccl_2.7.3-1+cuda11.0_x86_64.txz -o $HOME/nccl_x86_64.txz
+        curl -L https://developer.download.nvidia.com/compute/redist/nccl/v2.7/nccl_2.7.6-1+cuda11.0_x86_64.txz -o $HOME/nccl_x86_64.txz
 
-        docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "rpm -i $HOME/cuda-repo-rhel7-11-0-local-11.0.1_450.36.06-1.x86_64.rpm"
+        docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "rpm -i $HOME/cuda-repo-rhel7-11-0-local-11.0.2_450.51.05-1.x86_64.rpm"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "cd /var/cuda-repo-rhel7-11-0-local/; rpm -i --nodeps cuda*.rpm libc*.rpm libn*.rpm"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/libcuda.so"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "ln -sf /usr/local/cuda/lib64/stubs/libnvidia-ml.so /usr/local/cuda/lib64/libnvidia-ml.so"
@@ -177,7 +176,7 @@ if [[ "$OS" == "linux-x86" ]] || [[ "$OS" == "linux-x86_64" ]] || [[ "$OS" =~ an
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "cp /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib64/libcuda.so; cp /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib64/libcuda.so.1"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "cp /usr/local/cuda/lib64/stubs/libnvidia-ml.so /usr/lib64/libnvidia-ml.so; cp /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib64/libnvidia-ml.so.1"
 
-        docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "cd $HOME/ccache-3.7/; ./configure; make; make install"
+        docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "cd $HOME/ccache-3.7.10/; ./configure; make; make install"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "echo 'CCACHE_CC=/usr/local/cuda/bin/nvcc /usr/local/bin/ccache compiler \"\$@\"' > /usr/local/cuda/bin/nvcccache"
         docker exec -ti $DOCKER_CONTAINER_ID /bin/bash -xec "chmod 755 /usr/local/cuda/bin/nvcccache"
   fi
@@ -335,7 +334,7 @@ if [ "$TRAVIS_OS_NAME" == "osx" ]; then
         # work around issues with CUDA 10.2
         for f in /usr/local/cuda/lib/*.10.dylib; do sudo ln -s $f ${f/%.10.dylib/.10.2.dylib}; done
 
-        cd $HOME/ccache-3.7/; ./configure; make; sudo make install; cd $TRAVIS_BUILD_DIR
+        cd $HOME/ccache-3.7.10/; ./configure; make; sudo make install; cd $TRAVIS_BUILD_DIR
         echo 'CCACHE_CC=/usr/local/cuda/bin/nvcc /usr/local/bin/ccache compiler "$@"' | sudo tee /usr/local/cuda/bin/nvcccache
         sudo chmod 755 /usr/local/cuda/bin/nvcccache
       fi

diff --git a/ci/install-windows.sh b/ci/install-windows.sh
@@ -117,9 +117,9 @@ fi
 
 if [ "$PROJ" == "cuda" ] || [ "$PROJ" == "tensorrt" ] || [ "$EXT" == "-gpu" ]; then
        echo Installing cuda 
-       curl -L -o cuda_11.0.1_451.22_win10.exe "http://developer.download.nvidia.com/compute/cuda/11.0.1/local_installers/cuda_11.0.1_451.22_win10.exe"
+       curl -L -o cuda_11.0.2_451.48_win10.exe "http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_451.48_win10.exe"
        curl -L -o cudnn-11.0-windows-x64-v8.0.1.13.zip "https://developer.download.nvidia.com/compute/redist/cudnn/v8.0.1/cudnn-11.0-windows-x64-v8.0.1.13.zip"
-       ./cuda_11.0.1_451.22_win10.exe -s
+       ./cuda_11.0.2_451.48_win10.exe -s
        sleep 60
        unzip ./cudnn-11.0-windows-x64-v8.0.1.13.zip
        mv ./cuda/bin/*.dll /c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.0/bin

diff --git a/cuda/README.md b/cuda/README.md
@@ -19,9 +19,9 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * CUDA 11.0  https://developer.nvidia.com/cuda-zone
+ * CUDA 11.0.2  https://developer.nvidia.com/cuda-zone
  * cuDNN 8.0.1  https://developer.nvidia.com/cudnn
- * NCCL 2.7.3  https://developer.nvidia.com/nccl
+ * NCCL 2.7.6  https://developer.nvidia.com/nccl
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 

diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cublas.java
@@ -101,9 +101,9 @@ public class cublas extends org.bytedeco.cuda.presets.cublas {
 // #endif /* __cplusplus */
 
 public static final int CUBLAS_VER_MAJOR = 11;
-public static final int CUBLAS_VER_MINOR = 0;
+public static final int CUBLAS_VER_MINOR = 1;
 public static final int CUBLAS_VER_PATCH = 0;
-public static final int CUBLAS_VER_BUILD = 191;
+public static final int CUBLAS_VER_BUILD = 229;
 public static final int CUBLAS_VERSION =  (CUBLAS_VER_MAJOR * 1000 + 
                          CUBLAS_VER_MINOR *  100 + 
                          CUBLAS_VER_PATCH);
@@ -146,7 +146,7 @@ public class cublas extends org.bytedeco.cuda.presets.cublas {
     CUBLAS_OP_T = 1,  
     CUBLAS_OP_C = 2,
     CUBLAS_OP_HERMITAN = 2, /* synonym if CUBLAS_OP_C */
-    CUBLAS_OP_CONJG = 3;     /* conjugate */
+    CUBLAS_OP_CONJG = 3;     /* conjugate, placeholder - not supported in the current release */
 
 
 /** enum cublasPointerMode_t */
@@ -7592,7 +7592,9 @@ public static native void cublasZtrmm(@Cast("char") byte side, @Cast("char") byt
     CUBLASLT_MATMUL_DESC_EPILOGUE = 7,
 
     /** Bias vector pointer in the device memory, see CUBLASLT_EPILOGUE_BIAS. Bias vector elements are the same type as
-     * alpha, beta (see CUBLASLT_MATMUL_DESC_SCALE_TYPE). Bias vector length must match matrix D rows count.
+     * the output elements (Ctype) with the exception of IMMA kernels with computeType=CUDA_R_32I and Ctype=CUDA_R_8I
+     * where the bias vector elements are the same type as alpha, beta (CUBLASLT_MATMUL_DESC_SCALE_TYPE=CUDA_R_32F).
+     * Bias vector length must match matrix D rows count.
      *
      * const void *, default: NULL
      */

diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java b/cuda/src/gen/java/org/bytedeco/cuda/global/cufft.java
@@ -91,16 +91,16 @@ public class cufft extends org.bytedeco.cuda.presets.cufft {
 // #endif
 
 public static final int CUFFT_VER_MAJOR = 10;
-public static final int CUFFT_VER_MINOR = 1;
-public static final int CUFFT_VER_PATCH = 3;
-public static final int CUFFT_VER_BUILD = 191;
+public static final int CUFFT_VER_MINOR = 2;
+public static final int CUFFT_VER_PATCH = 0;
+public static final int CUFFT_VER_BUILD = 218;
 
 // cuFFT library version
 //
 // CUFFT_VERSION / 1000 - major version
 // CUFFT_VERSION / 100 % 100 - minor version
 // CUFFT_VERSION % 100 - patch level
-public static final int CUFFT_VERSION = 10103;
+public static final int CUFFT_VERSION = 10200;
 
 // CUFFT API function return values
 /** enum cufftResult_t */

diff --git a/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java b/cuda/src/gen/java/org/bytedeco/cuda/global/curand.java
@@ -91,8 +91,8 @@ public class curand extends org.bytedeco.cuda.presets.curand {
 
 public static final int CURAND_VER_MAJOR = 10;
 public static final int CURAND_VER_MINOR = 2;
-public static final int CURAND_VER_PATCH = 0;
-public static final int CURAND_VER_BUILD = 191;
+public static final int CURAND_VER_PATCH = 1;
+public static final int CURAND_VER_BUILD = 218;
 public static final int CURAND_VERSION = (CURAND_VER_MAJOR * 1000 + 
                         CURAND_VER_MINOR *  100 + 
                         CURAND_VER_PATCH);