From 70307b82fecd4dab6b91e7d38934d090b340c87f Mon Sep 17 00:00:00 2001 From: Samuel Audet Date: Sun, 3 Nov 2024 20:41:33 +0900 Subject: [PATCH] * Upgrade presets for TensorRT 10.6.0.26, ONNX Runtime 1.20.0 --- .github/actions/deploy-ubuntu/action.yml | 8 +- .github/actions/deploy-windows/action.yml | 6 +- CHANGELOG.md | 2 +- README.md | 4 +- onnxruntime/README.md | 6 +- onnxruntime/cppbuild.sh | 6 +- onnxruntime/platform/gpu/pom.xml | 2 +- onnxruntime/platform/pom.xml | 2 +- onnxruntime/pom.xml | 2 +- onnxruntime/samples/pom.xml | 4 +- .../onnxruntime/BaseOrtLoraAdapter.java | 50 + .../org/bytedeco/onnxruntime/LoraAdapter.java | 57 + .../java/org/bytedeco/onnxruntime/OrtApi.java | 103 +- .../bytedeco/onnxruntime/OrtLoraAdapter.java | 23 + .../OrtMIGraphXProviderOptions.java | 1 + .../OrtOpenVINOProviderOptions.java | 1 - .../org/bytedeco/onnxruntime/RunOptions.java | 8 + .../org/bytedeco/onnxruntime/SessionImpl.java | 16 + .../onnxruntime/global/onnxruntime.java | 12 +- .../onnxruntime/presets/onnxruntime.java | 7 +- platform/pom.xml | 4 +- tensorrt/README.md | 6 +- tensorrt/platform/pom.xml | 2 +- tensorrt/platform/redist/pom.xml | 2 +- tensorrt/pom.xml | 2 +- tensorrt/samples/pom.xml | 4 +- .../org/bytedeco/tensorrt/global/nvinfer.java | 1762 +++++++++-------- .../bytedeco/tensorrt/nvinfer/IBuilder.java | 30 +- .../tensorrt/nvinfer/IBuilderConfig.java | 8 +- .../tensorrt/nvinfer/IDebugListener.java | 2 +- .../tensorrt/nvinfer/IExecutionContext.java | 12 +- .../tensorrt/nvinfer/IGpuAllocator.java | 5 +- .../bytedeco/tensorrt/nvinfer/ILogger.java | 5 +- .../tensorrt/nvinfer/IPluginCreator.java | 1 - .../nvinfer/IPluginCreatorInterface.java | 1 + .../tensorrt/nvinfer/IPluginCreatorV3One.java | 6 + .../nvinfer/IPluginResourceContext.java | 1 + .../bytedeco/tensorrt/nvinfer/IPluginV3.java | 1 + .../tensorrt/nvinfer/IPluginV3OneBuild.java | 3 + .../bytedeco/tensorrt/nvinfer/IRuntime.java | 2 + .../bytedeco/tensorrt/nvinfer/VBuilder.java | 4 +- .../bytedeco/tensorrt/presets/nvinfer.java | 7 +- tritonserver/samples/unsupported/pom.xml | 2 +- 43 files changed, 1283 insertions(+), 909 deletions(-) create mode 100644 onnxruntime/src/gen/java/org/bytedeco/onnxruntime/BaseOrtLoraAdapter.java create mode 100644 onnxruntime/src/gen/java/org/bytedeco/onnxruntime/LoraAdapter.java create mode 100644 onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtLoraAdapter.java diff --git a/.github/actions/deploy-ubuntu/action.yml b/.github/actions/deploy-ubuntu/action.yml index 791cc3c103e..402820aa92d 100644 --- a/.github/actions/deploy-ubuntu/action.yml +++ b/.github/actions/deploy-ubuntu/action.yml @@ -213,16 +213,16 @@ runs: if [[ "$CI_DEPLOY_PLATFORM" == "linux-arm64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then echo Installing TensorRT # python3 -m gdown 1LZRCv4ZAGiDQAu4pvADJIGntq4cGl5tU - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz - $SUDO tar -hxvf TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/ + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz + $SUDO tar -hxvf TensorRT-10.6.0.26.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/ $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt fi if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then echo Installing TensorRT # python3 -m gdown 1dVhD-DEYY42QbZe1GXl-vxe3k6KqWGsL - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz - $SUDO tar -hxvf TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/ + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz + $SUDO tar -hxvf TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/ $SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt fi diff --git a/.github/actions/deploy-windows/action.yml b/.github/actions/deploy-windows/action.yml index 93ab34c928c..5c2064d9f0c 100644 --- a/.github/actions/deploy-windows/action.yml +++ b/.github/actions/deploy-windows/action.yml @@ -142,9 +142,9 @@ runs: if "%CI_DEPLOY_MODULE%"=="tensorrt" ( echo Installing TensorRT rem python -m gdown 1GfmJ1BKbacLpUU-0i_mGu0sjrAS0Xzzi - curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip - unzip TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip - move TensorRT-10.5.0.18 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT" + curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip + unzip TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip + move TensorRT-10.6.0.26 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT" ) if "%CI_DEPLOY_MODULE%"=="mkl" ( diff --git a/CHANGELOG.md b/CHANGELOG.md index e70e3dc4004..28b6710b011 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481)) * Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472)) * Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475)) - * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies + * Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.6.0.26, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.20.0, TVM 0.18.0, and their dependencies ### January 29, 2024 version 1.5.10 * Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463)) diff --git a/README.md b/README.md index f6cc649e5f9..018099bb495 100644 --- a/README.md +++ b/README.md @@ -227,13 +227,13 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip * SentencePiece 0.2.0 https://github.com/google/sentencepiece * TensorFlow 1.15.x https://github.com/tensorflow/tensorflow * TensorFlow Lite 2.18.x https://github.com/tensorflow/tensorflow - * TensorRT 10.5.x https://developer.nvidia.com/tensorrt + * TensorRT 10.6.x https://developer.nvidia.com/tensorrt * Triton Inference Server 2.51.x https://developer.nvidia.com/nvidia-triton-inference-server * The Arcade Learning Environment 0.8.x https://github.com/mgbellemare/Arcade-Learning-Environment * DepthAI 2.24.x https://github.com/luxonis/depthai-core * ONNX 1.17.x https://github.com/onnx/onnx * nGraph 0.26.0 https://github.com/NervanaSystems/ngraph - * ONNX Runtime 1.19.x https://github.com/microsoft/onnxruntime + * ONNX Runtime 1.20.x https://github.com/microsoft/onnxruntime * TVM 0.18.x https://github.com/apache/tvm * Bullet Physics SDK 3.25 https://pybullet.org * LiquidFun http://google.github.io/liquidfun/ diff --git a/onnxruntime/README.md b/onnxruntime/README.md index 816a796f310..a6eef34d7fa 100644 --- a/onnxruntime/README.md +++ b/onnxruntime/README.md @@ -9,7 +9,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * ONNX Runtime 1.19.2 https://microsoft.github.io/onnxruntime/ + * ONNX Runtime 1.20.0 https://microsoft.github.io/onnxruntime/ Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -46,14 +46,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco onnxruntime-platform - 1.19.2-1.5.11-SNAPSHOT + 1.20.0-1.5.11-SNAPSHOT org.bytedeco onnxruntime-platform-gpu - 1.19.2-1.5.11-SNAPSHOT + 1.20.0-1.5.11-SNAPSHOT diff --git a/onnxruntime/cppbuild.sh b/onnxruntime/cppbuild.sh index 05a741b09c7..778cd4bb916 100755 --- a/onnxruntime/cppbuild.sh +++ b/onnxruntime/cppbuild.sh @@ -22,7 +22,7 @@ if [[ "$EXTENSION" == *gpu ]]; then GPU_FLAGS="--use_cuda" fi -ONNXRUNTIME=1.19.2 +ONNXRUNTIME=1.20.0 mkdir -p "$PLATFORM$EXTENSION" cd "$PLATFORM$EXTENSION" @@ -84,7 +84,7 @@ sedinplace 's/MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeon_I8MM()/false/g' onnxrunt # work around toolchain issues on Mac and Windows patch -p1 < ../../../onnxruntime.patch -patch -p1 < ../../../onnxruntime-cuda.patch # https://github.com/microsoft/onnxruntime/pull/22316 +#patch -p1 < ../../../onnxruntime-cuda.patch # https://github.com/microsoft/onnxruntime/pull/22316 #patch -p1 < ../../../onnxruntime-windows.patch # https://github.com/microsoft/onnxruntime/pull/7883 sedinplace '/--Werror/d' cmake/CMakeLists.txt sedinplace '/-DCMAKE_CUDA_COMPILER=/d' tools/ci_build/build.py @@ -113,6 +113,8 @@ sedinplace 's/, data_dims);/);/g' onnxruntime/core/providers/dnnl/subgraph/dnnl_ sedinplace 's/, dims);/);/g' onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq.cc sedinplace '/omp_get_max_threads/d' onnxruntime/core/providers/dnnl/dnnl_execution_provider.cc sedinplace '/omp_set_num_threads/d' onnxruntime/core/providers/dnnl/dnnl_execution_provider.cc +sedinplace '/cvtfp16Avx/d' cmake/onnxruntime_mlas.cmake +sedinplace 's/MlasCastF16ToF32KernelAvx;/MlasCastF16ToF32KernelAvx2;/g' onnxruntime/core/mlas/lib/platform.cpp # use PTX instead of compiling for all CUDA archs to reduce library size sedinplace 's/-gencode=arch=compute_52,code=sm_52/-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_90,code=sm_90/g' cmake/CMakeLists.txt diff --git a/onnxruntime/platform/gpu/pom.xml b/onnxruntime/platform/gpu/pom.xml index 57bb72e76ba..ae582a58d04 100644 --- a/onnxruntime/platform/gpu/pom.xml +++ b/onnxruntime/platform/gpu/pom.xml @@ -12,7 +12,7 @@ org.bytedeco onnxruntime-platform-gpu - 1.19.2-${project.parent.version} + 1.20.0-${project.parent.version} JavaCPP Presets Platform GPU for ONNX Runtime diff --git a/onnxruntime/platform/pom.xml b/onnxruntime/platform/pom.xml index 8bd3e21ec95..b5fe2868b4b 100644 --- a/onnxruntime/platform/pom.xml +++ b/onnxruntime/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco onnxruntime-platform - 1.19.2-${project.parent.version} + 1.20.0-${project.parent.version} JavaCPP Presets Platform for ONNX Runtime diff --git a/onnxruntime/pom.xml b/onnxruntime/pom.xml index 402f2bcf863..77e1badcfd4 100644 --- a/onnxruntime/pom.xml +++ b/onnxruntime/pom.xml @@ -11,7 +11,7 @@ org.bytedeco onnxruntime - 1.19.2-${project.parent.version} + 1.20.0-${project.parent.version} JavaCPP Presets for ONNX Runtime diff --git a/onnxruntime/samples/pom.xml b/onnxruntime/samples/pom.xml index 06a60167c4d..08c0c2c5ca4 100644 --- a/onnxruntime/samples/pom.xml +++ b/onnxruntime/samples/pom.xml @@ -12,14 +12,14 @@ org.bytedeco onnxruntime-platform - 1.19.2-1.5.11-SNAPSHOT + 1.20.0-1.5.11-SNAPSHOT org.bytedeco onnxruntime-platform-gpu - 1.19.2-1.5.11-SNAPSHOT + 1.20.0-1.5.11-SNAPSHOT diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/BaseOrtLoraAdapter.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/BaseOrtLoraAdapter.java new file mode 100644 index 00000000000..4685a956a7f --- /dev/null +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/BaseOrtLoraAdapter.java @@ -0,0 +1,50 @@ +// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE + +package org.bytedeco.onnxruntime; + +import java.nio.*; +import org.bytedeco.javacpp.*; +import org.bytedeco.javacpp.annotation.*; + +import static org.bytedeco.javacpp.presets.javacpp.*; +import org.bytedeco.opencl.*; +import static org.bytedeco.opencl.global.OpenCL.*; +import org.bytedeco.dnnl.*; +import static org.bytedeco.dnnl.global.dnnl.*; + +import static org.bytedeco.onnxruntime.global.onnxruntime.*; + +@Name("Ort::detail::Base") @NoOffset @Properties(inherit = org.bytedeco.onnxruntime.presets.onnxruntime.class) +public class BaseOrtLoraAdapter extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public BaseOrtLoraAdapter(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public BaseOrtLoraAdapter(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public BaseOrtLoraAdapter position(long position) { + return (BaseOrtLoraAdapter)super.position(position); + } + @Override public BaseOrtLoraAdapter getPointer(long i) { + return new BaseOrtLoraAdapter((Pointer)this).offsetAddress(i); + } + + + public BaseOrtLoraAdapter() { super((Pointer)null); allocate(); } + private native void allocate(); + public BaseOrtLoraAdapter(@Cast("Ort::detail::Base::contained_type*") OrtLoraAdapter p) { super((Pointer)null); allocate(p); } + @NoException(true) private native void allocate(@Cast("Ort::detail::Base::contained_type*") OrtLoraAdapter p); + + + + + public BaseOrtLoraAdapter(@ByRef(true) BaseOrtLoraAdapter v) { super((Pointer)null); allocate(v); } + @NoException(true) private native void allocate(@ByRef(true) BaseOrtLoraAdapter v); + public native @ByRef @Name("operator =") @NoException(true) BaseOrtLoraAdapter put(@ByRef(true) BaseOrtLoraAdapter v); + + public native @Cast("Ort::detail::Base::contained_type*") @Name("operator Ort::detail::Base::contained_type*") @NoException(true) OrtLoraAdapter asOrtLoraAdapter(); + + /** \brief Relinquishes ownership of the contained C object pointer + * The underlying object is not destroyed */ + public native @Cast("Ort::detail::Base::contained_type*") OrtLoraAdapter release(); +} diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/LoraAdapter.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/LoraAdapter.java new file mode 100644 index 00000000000..22d6f80affe --- /dev/null +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/LoraAdapter.java @@ -0,0 +1,57 @@ +// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE + +package org.bytedeco.onnxruntime; + +import java.nio.*; +import org.bytedeco.javacpp.*; +import org.bytedeco.javacpp.annotation.*; + +import static org.bytedeco.javacpp.presets.javacpp.*; +import org.bytedeco.opencl.*; +import static org.bytedeco.opencl.global.OpenCL.*; +import org.bytedeco.dnnl.*; +import static org.bytedeco.dnnl.global.dnnl.*; + +import static org.bytedeco.onnxruntime.global.onnxruntime.*; + + +/** \brief LoraAdapter holds a set of Lora Parameters loaded from a single file */ +@Namespace("Ort") @Properties(inherit = org.bytedeco.onnxruntime.presets.onnxruntime.class) +public class LoraAdapter extends BaseOrtLoraAdapter { + static { Loader.load(); } + /** Default native constructor. */ + public LoraAdapter() { super((Pointer)null); allocate(); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public LoraAdapter(long size) { super((Pointer)null); allocateArray(size); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public LoraAdapter(Pointer p) { super(p); } + private native void allocate(); + private native void allocateArray(long size); + @Override public LoraAdapter position(long position) { + return (LoraAdapter)super.position(position); + } + @Override public LoraAdapter getPointer(long i) { + return new LoraAdapter((Pointer)this).offsetAddress(i); + } + + /** \brief Wraps OrtApi::CreateLoraAdapter + * + * The function attempts to load the adapter from the specified file + * @param adapter_path The path to the Lora adapter + * @param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still + * be copied to device if required by the model at inference time. */ + + /// + public static native @ByVal LoraAdapter CreateLoraAdapter(@Cast("const std::basic_string*") @ByRef Pointer adapter_path, + OrtAllocator allocator); + + /** \brief Wraps OrtApi::CreateLoraAdapterFromArray + * + * The function attempts to load the adapter from the specified byte array. + * @param bytes The byte array containing file LoraAdapter format + * @param num_bytes The number of bytes in the byte array + * @param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still + * be copied to device if required by the model at inference time. */ + public static native @ByVal LoraAdapter CreateLoraAdapterFromArray(@Const Pointer bytes, @Cast("size_t") long num_bytes, + OrtAllocator allocator); +} diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtApi.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtApi.java index 6b10694a81a..43c2b039a0b 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtApi.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtApi.java @@ -3529,10 +3529,17 @@ public native OrtStatus InvokeOp( * - "73" * - "75" * "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device). - "enable_htp_fp16_precision": Only used for float32 model. - Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision. - - "0": Default. With fp32 precision. - - "1": With fp16 precision. + * "enable_htp_fp16_precision": Used for float32 model for HTP backend. + * Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision. + * - "0": With fp32 precision. + * - "1": Default. With fp16 precision. + * "enable_htp_weight_sharing": Enable QNN weight sharing feature while compiling multiple graphs into one QNN context. + * - "0": Default. Disabled. + * - "1": Enabled. + * "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another + * execution provider (typically CPU EP). + * - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O. + * - "1": Enabled. * * SNPE supported keys: * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16", @@ -3718,7 +3725,7 @@ public native OrtStatus GetCANNProviderOptionsAsString( @Const OrtCANNProviderOp /** \brief Release an OrtCANNProviderOptions * - * @param the [in] pointer of OrtCANNProviderOptions which will been deleted + * @param input [in] The pointer of OrtCANNProviderOptions which will been deleted * * @since Version 1.13. */ @@ -4836,4 +4843,90 @@ public native OrtStatus AddExternalInitializersFromFilesInMemory( OrtSessionOpti @Cast("char*const*") @ByPtrPtr byte[] external_initializer_file_buffer_array, @Cast("const size_t*") SizeTPointer external_initializer_file_lengths, @Cast("size_t") long num_external_initializer_files); + + /** \brief Create an OrtLoraAdapter + * + * The function attempts to locate file specified by adapter_file_path, read it and create an OrtLoraAdapter + * instance. The adapter_file_path should be a valid path to a file that contains a valid Lora Adapter + * format. The function attempts to validate the format at load time. The file will always be memory mapped, unless + * the platform does not support memory mapping, in which case the file will be read into memory. + * + * @param adapter_file_path [in] adapter file path. + * @param allocator [in] optional pointer to a device allocator. If specified + * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU. + * The data would still be copied to device if required by the model at inference time. + * @param out [out] A pointer to a newly created OrtLoraAdapter instance. Must be released with + * OrtApi::ReleaseLoraAdapter. + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + public native OrtStatus CreateLoraAdapter( @Cast("const ORTCHAR_T*") Pointer adapter_file_path, OrtAllocator allocator, + @Cast("OrtLoraAdapter**") PointerPointer out); + public native OrtStatus CreateLoraAdapter( @Cast("const ORTCHAR_T*") Pointer adapter_file_path, OrtAllocator allocator, + @ByPtrPtr OrtLoraAdapter out); + + /** \brief Create an OrtLoraAdapter + * + * The function copies the bytes from the array and creates an OrtLoraAdapter instance. + * + * + * @param bytes [in] pointer to a valid Lora Adapter format buffer. + * @param num_bytes [in] length of bytes buffer. + * @param allocator [in] optional pointer to a device allocator. If specified + * data is copied to the device at some point before Run() is invoked. If nullptr, data stays on CPU. + * The data would still be copied to device if required by the model at inference time. + * @param out [out] A pointer to a newly created OrtLoraAdapter instance. Must be released with + * OrtApi::ReleaseLoraAdapter. + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + public native OrtStatus CreateLoraAdapterFromArray( @Const Pointer bytes, @Cast("size_t") long num_bytes, OrtAllocator allocator, + @Cast("OrtLoraAdapter**") PointerPointer out); + public native OrtStatus CreateLoraAdapterFromArray( @Const Pointer bytes, @Cast("size_t") long num_bytes, OrtAllocator allocator, + @ByPtrPtr OrtLoraAdapter out); + + /** \brief Release an ::OrtLoraAdapter obtained from OrtApi::CreateLoraAdapter + */ + public native void ReleaseLoraAdapter(OrtLoraAdapter input); + + /** \brief Add the Lora Adapter to the list of active adapters. + * + * The function adds the Lora Adapter to the list of active adapters. The Lora Adapter must be created with + * OrtApi::CreateLoraAdapter or FromArray. The Lora Adapter will be used by the session to run the model. + * The instance of the OrtRunOptions can then be used to customize the Run() calls. + * More than one OrtLoraAdapter can be active at the same time. Lora Parameters that belong to different + * Lora adapters that will be active at the same time must not overlap. + * This setting does not affect RunWithBinding. + * + * @param options [in] OrtRunOptions instance + * @param adapter [in] OrtLoraAdapter instance + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + public native OrtStatus RunOptionsAddActiveLoraAdapter( OrtRunOptions options, @Const OrtLoraAdapter adapter); + + /** \} + * \name OrtEpDynamicOptions + * \{ +

+ /** \brief Set DynamicOptions for EPs (Execution Providers) + * + * Valid options can be found in {@code include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h} + * Look for {@code kOrtEpDynamicOptions} + * + * @param sess [in] OrtSession + * @param keys [in] Array of null terminated UTF8 encoded strings of EP dynamic option keys + * @param values [in] Array of null terminated UTF8 encoded string of EP dynamic option values + * @param kv_len [in] Number of elements in the keys and values arrays + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + public native OrtStatus SetEpDynamicOptions( OrtSession sess, @Cast("const char*const*") PointerPointer keys, + @Cast("const char*const*") PointerPointer values, @Cast("size_t") long kv_len); + public native OrtStatus SetEpDynamicOptions( OrtSession sess, @Cast("const char*const*") @ByPtrPtr BytePointer keys, + @Cast("const char*const*") @ByPtrPtr BytePointer values, @Cast("size_t") long kv_len); + public native OrtStatus SetEpDynamicOptions( OrtSession sess, @Cast("const char*const*") @ByPtrPtr ByteBuffer keys, + @Cast("const char*const*") @ByPtrPtr ByteBuffer values, @Cast("size_t") long kv_len); + public native OrtStatus SetEpDynamicOptions( OrtSession sess, @Cast("const char*const*") @ByPtrPtr byte[] keys, + @Cast("const char*const*") @ByPtrPtr byte[] values, @Cast("size_t") long kv_len); } diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtLoraAdapter.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtLoraAdapter.java new file mode 100644 index 00000000000..814d4bc1b87 --- /dev/null +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtLoraAdapter.java @@ -0,0 +1,23 @@ +// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE + +package org.bytedeco.onnxruntime; + +import java.nio.*; +import org.bytedeco.javacpp.*; +import org.bytedeco.javacpp.annotation.*; + +import static org.bytedeco.javacpp.presets.javacpp.*; +import org.bytedeco.opencl.*; +import static org.bytedeco.opencl.global.OpenCL.*; +import org.bytedeco.dnnl.*; +import static org.bytedeco.dnnl.global.dnnl.*; + +import static org.bytedeco.onnxruntime.global.onnxruntime.*; + +@Opaque @Properties(inherit = org.bytedeco.onnxruntime.presets.onnxruntime.class) +public class OrtLoraAdapter extends Pointer { + /** Empty constructor. Calls {@code super((Pointer)null)}. */ + public OrtLoraAdapter() { super((Pointer)null); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public OrtLoraAdapter(Pointer p) { super(p); } +} diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtMIGraphXProviderOptions.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtMIGraphXProviderOptions.java index 4a75c6b211d..d63068551d5 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtMIGraphXProviderOptions.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtMIGraphXProviderOptions.java @@ -46,4 +46,5 @@ public class OrtMIGraphXProviderOptions extends Pointer { public native @Cast("const char*") BytePointer migraphx_save_model_path(); public native OrtMIGraphXProviderOptions migraphx_save_model_path(BytePointer setter); // migraphx model path name public native int migraphx_load_compiled_model(); public native OrtMIGraphXProviderOptions migraphx_load_compiled_model(int setter); // migraphx int8 cal table. Default 0 = false, noznero = true public native @Cast("const char*") BytePointer migraphx_load_model_path(); public native OrtMIGraphXProviderOptions migraphx_load_model_path(BytePointer setter); // migraphx model path name + public native @Cast("bool") boolean migraphx_exhaustive_tune(); public native OrtMIGraphXProviderOptions migraphx_exhaustive_tune(boolean setter); // migraphx tuned compile Default = false } diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtOpenVINOProviderOptions.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtOpenVINOProviderOptions.java index 433c1da9492..ec48b1a88df 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtOpenVINOProviderOptions.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/OrtOpenVINOProviderOptions.java @@ -43,7 +43,6 @@ public class OrtOpenVINOProviderOptions extends Pointer { * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16" */ public native @Cast("const char*") BytePointer device_type(); public native OrtOpenVINOProviderOptions device_type(BytePointer setter); - /** 0 = disabled, nonzero = enabled */ public native @Cast("unsigned char") byte enable_npu_fast_compile(); public native OrtOpenVINOProviderOptions enable_npu_fast_compile(byte setter); public native @Cast("const char*") BytePointer device_id(); public native OrtOpenVINOProviderOptions device_id(BytePointer setter); /** 0 = Use default number of threads */ diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/RunOptions.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/RunOptions.java index f4f08350d1c..6e7add02d2d 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/RunOptions.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/RunOptions.java @@ -68,4 +68,12 @@ public class RunOptions extends BaseRunOptions { * Wraps OrtApi::RunOptionsUnsetTerminate */ public native @ByRef RunOptions UnsetTerminate(); + + /** \brief Add the LoraAdapter to the list of active adapters. + * The setting does not affect RunWithBinding() calls. + * + * Wraps OrtApi::RunOptionsAddActiveLoraAdapter + * @param adapter The LoraAdapter to be used as the active adapter + */ + public native @ByRef RunOptions AddActiveLoraAdapter(@Const @ByRef LoraAdapter adapter); } diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/SessionImpl.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/SessionImpl.java index 37ecc69d026..9421b63b82e 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/SessionImpl.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/SessionImpl.java @@ -111,4 +111,20 @@ public native void RunAsync(@Const @ByRef RunOptions run_options, @Cast("const c */ /** Wraps OrtApi::SessionEndProfiling */ public native @UniquePtr("char, Ort::detail::AllocatedFree") @Cast("char*") BytePointer EndProfilingAllocated(OrtAllocator allocator); + + /** \brief Set DynamicOptions for EPs (Execution Providers) + * + * Wraps OrtApi::SetEpDynamicOptions + * + * Valid options can be found in {@code include\onnxruntime\core\session\onnxruntime_session_options_config_keys.h} + * Look for {@code kOrtEpDynamicOptions} + * + * @param keys [in] Array of null terminated UTF8 encoded strings of EP dynamic option keys + * @param values [in] Array of null terminated UTF8 encoded string of EP dynamic option values + * @param kv_len [in] Number of elements in the keys and values arrays + */ + public native void SetEpDynamicOptions(@Cast("const char*const*") PointerPointer keys, @Cast("const char*const*") PointerPointer values, @Cast("size_t") long kv_len); + public native void SetEpDynamicOptions(@Cast("const char*const*") @ByPtrPtr BytePointer keys, @Cast("const char*const*") @ByPtrPtr BytePointer values, @Cast("size_t") long kv_len); + public native void SetEpDynamicOptions(@Cast("const char*const*") @ByPtrPtr ByteBuffer keys, @Cast("const char*const*") @ByPtrPtr ByteBuffer values, @Cast("size_t") long kv_len); + public native void SetEpDynamicOptions(@Cast("const char*const*") @ByPtrPtr byte[] keys, @Cast("const char*const*") @ByPtrPtr byte[] values, @Cast("size_t") long kv_len); } diff --git a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/global/onnxruntime.java b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/global/onnxruntime.java index 90c725945d4..4fa658652fa 100644 --- a/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/global/onnxruntime.java +++ b/onnxruntime/src/gen/java/org/bytedeco/onnxruntime/global/onnxruntime.java @@ -74,7 +74,7 @@ public class onnxruntime extends org.bytedeco.onnxruntime.presets.onnxruntime { * * This value is used by some API functions to behave as this version of the header expects. */ -public static final int ORT_API_VERSION = 19; +public static final int ORT_API_VERSION = 20; // #ifdef __cplusplus // #endif @@ -395,6 +395,9 @@ public enum OrtSparseIndicesFormat { // Targeting ../OrtShapeInferContext.java +// Targeting ../OrtLoraAdapter.java + + // #ifdef _WIN32 // #else @@ -736,6 +739,7 @@ public enum OrtSparseIndicesFormat { @Namespace("Ort::detail") public static native void OrtRelease(OrtThreadingOptions ptr); @Namespace("Ort::detail") public static native void OrtRelease(OrtEnv ptr); @Namespace("Ort::detail") public static native void OrtRelease(OrtRunOptions ptr); +@Namespace("Ort::detail") public static native void OrtRelease(OrtLoraAdapter ptr); @Namespace("Ort::detail") public static native void OrtRelease(OrtSession ptr); @Namespace("Ort::detail") public static native void OrtRelease(OrtSessionOptions ptr); @Namespace("Ort::detail") public static native void OrtRelease(OrtTensorTypeAndShapeInfo ptr); @@ -848,6 +852,9 @@ public enum OrtSparseIndicesFormat { // Targeting ../BaseThreadingOptions.java +// Targeting ../BaseOrtLoraAdapter.java + + // Undefined. For const types use Base> @@ -880,6 +887,9 @@ public enum OrtSparseIndicesFormat { // Targeting ../CustomOpDomain.java +// Targeting ../LoraAdapter.java + + // Targeting ../RunOptions.java diff --git a/onnxruntime/src/main/java/org/bytedeco/onnxruntime/presets/onnxruntime.java b/onnxruntime/src/main/java/org/bytedeco/onnxruntime/presets/onnxruntime.java index 963b4a82ea7..7e04f0738f5 100644 --- a/onnxruntime/src/main/java/org/bytedeco/onnxruntime/presets/onnxruntime.java +++ b/onnxruntime/src/main/java/org/bytedeco/onnxruntime/presets/onnxruntime.java @@ -64,16 +64,16 @@ // "onnxruntime/core/providers/rocm/rocm_provider_factory.h", // "onnxruntime/core/providers/dml/dml_provider_factory.h", }, - link = {"onnxruntime_providers_shared", "onnxruntime@.1.19.2"} + link = {"onnxruntime_providers_shared", "onnxruntime@.1"} ), @Platform( value = {"linux-x86_64", "macosx-x86_64", "windows-x86_64"}, - link = {"onnxruntime_providers_shared", "onnxruntime@.1.19.2", "onnxruntime_providers_dnnl"} + link = {"onnxruntime_providers_shared", "onnxruntime@.1", "onnxruntime_providers_dnnl"} ), @Platform( value = {"linux-x86_64", "macosx-x86_64", "windows-x86_64"}, extension = "-gpu", - link = {"onnxruntime_providers_shared", "onnxruntime@.1.19.2", "onnxruntime_providers_dnnl", "onnxruntime_providers_cuda"} + link = {"onnxruntime_providers_shared", "onnxruntime@.1", "onnxruntime_providers_dnnl", "onnxruntime_providers_cuda"} ), }, target = "org.bytedeco.onnxruntime", @@ -223,6 +223,7 @@ public void map(InfoMap infoMap) { .put(new Info("Ort::detail::KernelInfoImpl").pointerTypes("KernelInfoImpl")) .put(new Info("Ort::detail::Base").pointerTypes("BaseKernelInfo")) .put(new Info("Ort::detail::Base").pointerTypes("BaseThreadingOptions")) + .put(new Info("Ort::detail::Base").pointerTypes("BaseOrtLoraAdapter")) .put(new Info("OrtSessionOptionsAppendExecutionProvider_MIGraphX", "OrtSessionOptionsAppendExecutionProvider_Tensorrt", "OrtSessionOptionsAppendExecutionProvider_ROCM", "Ort::detail::OptionalTypeInfoImpl::GetOptionalElementType").skip()) diff --git a/platform/pom.xml b/platform/pom.xml index 80fcbe436f7..81cbbf2f0f2 100644 --- a/platform/pom.xml +++ b/platform/pom.xml @@ -312,7 +312,7 @@ org.bytedeco tensorrt-platform - 10.5-${project.version} + 10.6-${project.version} org.bytedeco @@ -342,7 +342,7 @@ org.bytedeco onnxruntime-platform - 1.19.2-${project.version} + 1.20.0-${project.version} org.bytedeco diff --git a/tensorrt/README.md b/tensorrt/README.md index f255089e232..a00cbbc6e95 100644 --- a/tensorrt/README.md +++ b/tensorrt/README.md @@ -17,7 +17,7 @@ Introduction ------------ This directory contains the JavaCPP Presets module for: - * TensorRT 10.5.0.18 https://developer.nvidia.com/tensorrt + * TensorRT 10.6.0.26 https://developer.nvidia.com/tensorrt Please refer to the parent README.md file for more detailed information about the JavaCPP Presets. @@ -54,7 +54,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco tensorrt-platform - 10.5-1.5.11-SNAPSHOT + 10.6-1.5.11-SNAPSHOT @@ -66,7 +66,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic org.bytedeco tensorrt-platform-redist - 10.5-1.5.11-SNAPSHOT + 10.6-1.5.11-SNAPSHOT diff --git a/tensorrt/platform/pom.xml b/tensorrt/platform/pom.xml index afc6e0dc55d..b397aec501b 100644 --- a/tensorrt/platform/pom.xml +++ b/tensorrt/platform/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tensorrt-platform - 10.5-${project.parent.version} + 10.6-${project.parent.version} JavaCPP Presets Platform for TensorRT diff --git a/tensorrt/platform/redist/pom.xml b/tensorrt/platform/redist/pom.xml index cabae40357c..65a97827b7a 100644 --- a/tensorrt/platform/redist/pom.xml +++ b/tensorrt/platform/redist/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tensorrt-platform-redist - 10.5-${project.parent.version} + 10.6-${project.parent.version} JavaCPP Presets Platform Redist for TensorRT diff --git a/tensorrt/pom.xml b/tensorrt/pom.xml index da69643f07f..abf45c002c3 100644 --- a/tensorrt/pom.xml +++ b/tensorrt/pom.xml @@ -11,7 +11,7 @@ org.bytedeco tensorrt - 10.5-${project.parent.version} + 10.6-${project.parent.version} JavaCPP Presets for TensorRT diff --git a/tensorrt/samples/pom.xml b/tensorrt/samples/pom.xml index f7094fb21a9..8a47270598d 100644 --- a/tensorrt/samples/pom.xml +++ b/tensorrt/samples/pom.xml @@ -12,7 +12,7 @@ org.bytedeco tensorrt-platform - 10.5-1.5.11-SNAPSHOT + 10.6-1.5.11-SNAPSHOT @@ -24,7 +24,7 @@ org.bytedeco tensorrt-platform-redist - 10.5-1.5.11-SNAPSHOT + 10.6-1.5.11-SNAPSHOT diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java index 874a32baa80..bf2ddef849c 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/global/nvinfer.java @@ -51,11 +51,11 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { /** TensorRT major version. */ public static final int NV_TENSORRT_MAJOR = 10; /** TensorRT minor version. */ -public static final int NV_TENSORRT_MINOR = 5; +public static final int NV_TENSORRT_MINOR = 6; /** TensorRT patch version. */ public static final int NV_TENSORRT_PATCH = 0; /** TensorRT build number. */ -public static final int NV_TENSORRT_BUILD = 18; +public static final int NV_TENSORRT_BUILD = 26; /** TensorRT LWS major version. */ public static final int NV_TENSORRT_LWS_MAJOR = 0; @@ -152,11 +152,9 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * * \warning Do not directly include this file. Instead include one of: * * NvInferRuntime.h (for the standard runtime) - * * NvInferSafeRuntime.h (for the safety runtime) - * * NvInferConsistency.h (for consistency checker) * * NvInferPluginUtils.h (for plugin utilities) * */ -// #if !defined(NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE) +// #if !defined(NV_INFER_INTERNAL_INCLUDE) // #endif /** Forward declare some CUDA types to avoid an include dependency. */ @@ -263,187 +261,6 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * */ -//! -//! -//! -//! -//! -//! -//! -//! -//! -//! - -/** - * \enum TensorFormat - * - * \brief Format of the input/output tensors. - * - * This enum is used by both plugins and network I/O tensors. - * - * @see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat() - * - * Many of the formats are **vector-major** or **vector-minor**. These formats specify - * a vector dimension and scalars per vector. - * For example, suppose that the tensor has has dimensions [M,N,C,H,W], - * the vector dimension is C and there are V scalars per vector. - * - * * A **vector-major** format splits the vectorized dimension into two axes in the - * memory layout. The vectorized dimension is replaced by an axis of length ceil(C/V) - * and a new dimension of length V is appended. For the example tensor, the memory layout - * is equivalent to an array with dimensions [M][N][ceil(C/V)][H][W][V]. - * Tensor coordinate (m,n,c,h,w) maps to array location [m][n][c/V][h][w][c\%V]. - * - * * A **vector-minor** format moves the vectorized dimension to become the last axis - * in the memory layout. For the example tensor, the memory layout is equivalent to an - * array with dimensions [M][N][H][W][ceil(C/V)*V]. Tensor coordinate (m,n,c,h,w) maps - * array location subscript [m][n][h][w][c]. - * - * In interfaces that refer to "components per element", that's the value of V above. - * - * For more information about data formats, see the topic "Data Format Description" located in the - * TensorRT Developer Guide. https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#data-format-desc - * */ -@Namespace("nvinfer1") public enum TensorFormat { - /** Memory layout is similar to an array in C or C++. - * The stride of each dimension is the product of the dimensions after it. - * The last dimension has unit stride. - * - * For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192]. */ - -//! - kLINEAR(0), - - /** Vector-major format with two scalars per vector. - * Vector dimension is third to last. - * - * This format requires FP16 or BF16 and at least three dimensions. */ - kCHW2(1), - - /** Vector-minor format with eight scalars per vector. - * Vector dimension is third to last. - * This format requires FP16 or BF16 and at least three dimensions. */ - -//! -//! -//! - kHWC8(2), - - /** Vector-major format with four scalars per vector. - * Vector dimension is third to last. - * - * This format requires INT8 or FP16 and at least three dimensions. - * For INT8, the length of the vector dimension must be a build-time constant. - * - * Deprecated usage: - * - * If running on the DLA, this format can be used for acceleration - * with the caveat that C must be less than or equal to 4. - * If used as DLA input and the build option kGPU_FALLBACK is not specified, - * it needs to meet line stride requirement of DLA format. Column stride in - * bytes must be a multiple of 64 on Orin. */ - -//! -//! - kCHW4(3), - - /** Vector-major format with 16 scalars per vector. - * Vector dimension is third to last. - * - * This format requires INT8 or FP16 and at least three dimensions. - * - * For DLA usage, this format maps to the native feature format for FP16, - * and the tensor sizes are limited to C,H,W in the range [1,8192]. */ - -//! -//! - kCHW16(4), - - /** Vector-major format with 32 scalars per vector. - * Vector dimension is third to last. - * - * This format requires at least three dimensions. - * - * For DLA usage, this format maps to the native feature format for INT8, - * and the tensor sizes are limited to C,H,W in the range [1,8192]. */ - -//! - kCHW32(5), - - /** Vector-minor format with eight scalars per vector. - * Vector dimension is fourth to last. - * - * This format requires FP16 or BF16 and at least four dimensions. */ - -//! - kDHWC8(6), - - /** Vector-major format with 32 scalars per vector. - * Vector dimension is fourth to last. - * - * This format requires FP16 or INT8 and at least four dimensions. */ - -//! - kCDHW32(7), - - /** Vector-minor format where channel dimension is third to last and unpadded. - * - * This format requires either FP32 or UINT8 and at least three dimensions. */ - -//! - kHWC(8), - - /** DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis - * always has unit stride. The stride for stepping along the H axis is - * rounded up to 64 bytes. - * - * The memory layout is equivalent to a C array with dimensions - * [N][C][H][roundUp(W, 64/elementSize)] where elementSize is - * 2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w) - * mapping to array subscript [n][c][h][w]. */ - -//! - kDLA_LINEAR(9), - - /** DLA image format. For a tensor with dimension {N, C, H, W} the C axis - * always has unit stride. The stride for stepping along the H axis is rounded up - * to 64 bytes on Orin. C can only be 1, 3 or 4. - * If C == 1, it will map to grayscale format. - * If C == 3 or C == 4, it will map to color image format. And if C == 3, - * the stride for stepping along the W axis needs to be padded to 4 in elements. - * - * When C is {1, 3, 4}, then C' is {1, 4, 4} respectively, - * the memory layout is equivalent to a C array with dimensions - * [N][H][roundUp(W, 64/C'/elementSize)][C'] on Orin - * where elementSize is 2 for FP16 - * and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array - * subscript [n][h][w][c]. */ - -//! - kDLA_HWC4(10), - - /** Vector-minor format with 16 scalars per vector. - * Vector dimension is third to last. - * - * This requires FP16 and at least three dimensions. */ - -//! - kHWC16(11), - - /** Vector-minor format with one scalar per vector. - * Vector dimension is fourth to last. - * - * This format requires FP32 and at least four dimensions. */ - kDHWC(12); - - public final int value; - private TensorFormat(int v) { this.value = v; } - private TensorFormat(TensorFormat e) { this.value = e.value; } - public TensorFormat intern() { for (TensorFormat e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} - - //! //! @@ -472,65 +289,6 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { // Targeting ../nvinfer/IVersionedInterface.java -/** Maximum number of elements in TensorFormat enum. @see TensorFormat */ - // namespace impl - - -/** - * \enum AllocatorFlag - * - * \brief Allowed type of memory allocation. - * */ -@Namespace("nvinfer1") public enum AllocatorFlag { - /** TensorRT may call realloc() on this allocation. */ - kRESIZABLE(0); - - public final int value; - private AllocatorFlag(int v) { this.value = v; } - private AllocatorFlag(AllocatorFlag e) { this.value = e.value; } - public AllocatorFlag intern() { for (AllocatorFlag e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} -/** Maximum number of elements in AllocatorFlag enum. @see AllocatorFlag */ - // namespace impl -// Targeting ../nvinfer/IGpuAllocator.java - - - - // namespace v_1_0 - -/** - * \class IGpuAllocator - * - * \brief Application-implemented class for controlling allocation on the GPU. - * - * \warning The lifetime of an IGpuAllocator object must exceed that of all objects that use it. - * - * This class is intended as a base class for allocators that implement synchronous allocation. - * If you want the benefits of asynchronous allocation, you can do either of: - * - * * Derive your class from IGpuAllocator and override all four of its virtual methods - * for allocation/deallocation, including the two deprecated methods. - * - * * Derive your class from IGpuAsyncAllocator and override its two pure virtual - * methods for allocation/deallocation. - * - * The latter style is preferred because it does not tie code to deprecated methods. - * - * @see IGpuAsyncAllocator. - * */ - - -//! -//! -//! -//! -//! -// Targeting ../nvinfer/ILogger.java - - -/** Maximum number of elements in ILogger::Severity enum. @see ILogger::Severity */ - // namespace impl /** * \enum ErrorCode @@ -733,47 +491,73 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { public TensorIOMode intern() { for (TensorIOMode e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } -// Targeting ../nvinfer/IStreamReader.java - - - // namespace v_1_0 +/** Maximum number of elements in TensorIOMode enum. @see TensorIOMode */ + // namespace impl + // namespace nvinfer1 /** - * \class IStreamReader - * - * \brief Application-implemented class for reading data in a stream-based manner. + * \brief Return the library version number. * - * \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReader, not - * v_1_0::IStreamReader + * The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH * */ -// Targeting ../nvinfer/IPluginResource.java +public static native @NoException(true) int getInferLibVersion(); - // class IPluginResource - // namespace v_1_0 +// #endif // NV_INFER_RUNTIME_BASE_H + + +// Parsed from NvInferRuntimeCommon.h + +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #ifndef NV_INFER_RUNTIME_COMMON_H + + +//! +//! +//! +//! +//! +// #define NV_INFER_RUNTIME_COMMON_H /** - * \class IPluginResource + * \file NvInferRuntimeCommon.h * - * \brief Interface for plugins to define custom resources that could be shared through the plugin registry + * This file provides the nvinfer1::IPluginRegistry interface, which will be moved to the NvInferRuntime.h header + * in a future release. * - * @see IPluginRegistry::acquirePluginResource - * @see IPluginRegistry::releasePluginResource - * */ -/** Maximum number of elements in TensorIOMode enum. @see TensorIOMode */ - // namespace impl - // namespace nvinfer1 - -/** - * \brief Return the library version number. + * \warning This file will be removed in a future release. * - * The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH + * \warning Do not directly include this file. Instead include NvInferRuntime.h * */ -public static native @NoException(true) int getInferLibVersion(); +public static final int NV_INFER_INTERNAL_INCLUDE = 1; +// #include "NvInferPluginBase.h" +// #undef NV_INFER_INTERNAL_INCLUDE +// #include "NvInferRuntimePlugin.h" +// Targeting ../nvinfer/IPluginRegistry.java -// #endif // NV_INFER_RUNTIME_BASE_H -// Parsed from NvInferRuntimePlugin.h + // namespace nvinfer1 + +// #endif /* NV_INFER_RUNTIME_COMMON_H */ + + +// Parsed from NvInferLegacyDims.h /* * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. @@ -792,31 +576,25 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * limitations under the License. */ -// #ifndef NV_INFER_RUNTIME_PLUGIN_H -// #define NV_INFER_RUNTIME_PLUGIN_H - -public static final int NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE = 1; +// #ifndef NV_INFER_LEGACY_DIMS_H +// #define NV_INFER_LEGACY_DIMS_H // #include "NvInferRuntimeBase.h" //! //! //! -//! //! //! //! -// #undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE +// #undef NV_INFER_INTERNAL_INCLUDE /** - * \file NvInferRuntimePlugin.h - * - * This file contains common definitions, data structures and interfaces that relate to plugins and are shared - * between the standard and safe runtime. + * \file NvInferLegacyDims.h * - * \warning Do not directly include this file. Instead include either NvInferRuntime.h (for the standard runtime) or - * NvInferSafeRuntime.h (for the safety runtime). + * This file contains declarations of legacy dimensions types which use channel + * semantics in their names, and declarations on which those types rely. *

* @@ -824,730 +602,648 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * * \brief The TensorRT API version 1 namespace. * */ +// Targeting ../nvinfer/Dims2.java -/** - * \brief PluginFormat is reserved for backward compatibility. - * - * @see IPluginV2::supportsFormat() - * */ +// Targeting ../nvinfer/DimsHW.java -//! -//! -/** - * \brief Bit at the plugin version to identify that it is a plugin. - * */ +// Targeting ../nvinfer/Dims3.java -//! -//! -//! -//! -//! -@Namespace("nvinfer1") @MemberGetter public static native int kPLUGIN_VERSION_PYTHON_BIT(); -public static final int kPLUGIN_VERSION_PYTHON_BIT = kPLUGIN_VERSION_PYTHON_BIT(); -// Targeting ../nvinfer/PluginTensorDesc.java +// Targeting ../nvinfer/Dims4.java -/** - * \struct PluginVersion - * - * \brief Definition of plugin versions. - * - * Tag for plug-in versions. Used in upper byte of getTensorRTVersion(). - * */ -@Namespace("nvinfer1") public enum PluginVersion { - /** IPluginV2 */ - kV2((byte)(0)), - /** IPluginV2Ext */ - kV2_EXT((byte)(1)), - /** IPluginV2IOExt */ - kV2_IOEXT((byte)(2)), - /** IPluginV2DynamicExt */ - kV2_DYNAMICEXT((byte)(3)), - /** IPluginV2DynamicExt-based Python plugins */ - kV2_DYNAMICEXT_PYTHON((byte)(kPLUGIN_VERSION_PYTHON_BIT | 3)); + // namespace nvinfer1 - public final byte value; - private PluginVersion(byte v) { this.value = v; } - private PluginVersion(PluginVersion e) { this.value = e.value; } - public PluginVersion intern() { for (PluginVersion e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} +// #endif // NV_INFER_LEGCY_DIMS_H + + +// Parsed from NvInferRuntime.h + +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #ifndef NV_INFER_RUNTIME_H + + +//! +//! +//! +// #define NV_INFER_RUNTIME_H /** - * \enum PluginCreatorVersion + * \file NvInferRuntime.h * - * \brief Enum to identify version of the plugin creator. + * This is the top-level API file for TensorRT extended runtime library. * */ -@Namespace("nvinfer1") public enum PluginCreatorVersion { - /** IPluginCreator */ - kV1(0), - /** IPluginCreator-based Python plugin creators */ - kV1_PYTHON(kPLUGIN_VERSION_PYTHON_BIT); - - public final int value; - private PluginCreatorVersion(int v) { this.value = v; } - private PluginCreatorVersion(PluginCreatorVersion e) { this.value = e.value; } - public PluginCreatorVersion intern() { for (PluginCreatorVersion e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} -// Targeting ../nvinfer/IPluginV2.java - -// Targeting ../nvinfer/IPluginV2Ext.java +// #include "NvInferImpl.h" +// #include "NvInferPluginBase.h" +// #undef NV_INFER_INTERNAL_INCLUDE +// #include "NvInferRuntimeCommon.h" +// Targeting ../nvinfer/IPluginFactory.java -// Targeting ../nvinfer/IPluginV2IOExt.java +// Targeting ../nvinfer/INoCopy.java /** - * \enum PluginFieldType + * \enum EngineCapability * - * \brief The possible field types for custom layer. + * \brief List of supported engine capability flows. + * + * \details The EngineCapability determines the restrictions of a network during build time and what runtime + * it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide + * any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard + * runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network + * operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe + * runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of + * network operations that are DLA compatible and the resulting serialized engine can be executed using standalone + * DLA runtime APIs. See sampleCudla for an example of integrating cuDLA APIs with TensorRT APIs. * */ -@Namespace("nvinfer1") public enum PluginFieldType { - /** FP16 field type. */ - kFLOAT16(0), - /** FP32 field type. */ - kFLOAT32(1), - /** FP64 field type. */ - kFLOAT64(2), - /** INT8 field type. */ - kINT8(3), - /** INT16 field type. */ - kINT16(4), - /** INT32 field type. */ - kINT32(5), - /** char field type. */ - kCHAR(6), - /** nvinfer1::Dims field type. */ - kDIMS(7), - /** Unknown field type. */ - kUNKNOWN(8), - /** BF16 field type. */ - kBF16(9), - /** INT64 field type. */ - kINT64(10), - /** FP8 field type. */ - kFP8(11), - /** INT4 field type. */ - kINT4(12); +@Namespace("nvinfer1") public enum EngineCapability { + /** + * Standard: TensorRT flow without targeting the safety runtime. + * This flow supports both DeviceType::kGPU and DeviceType::kDLA. + * */ + + +//! +//! + kSTANDARD(0), + + /** + * Safety: TensorRT flow with restrictions targeting the safety runtime. + * See safety documentation for list of supported layers and formats. + * This flow supports only DeviceType::kGPU. + * + * This flag is only supported in NVIDIA Drive(R) products. */ + + +//! +//! + kSAFETY(1), + + /** + * DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes. + * See DLA documentation for list of supported layers and formats. + * This flow supports only DeviceType::kDLA. + * */ + kDLA_STANDALONE(2); public final int value; - private PluginFieldType(int v) { this.value = v; } - private PluginFieldType(PluginFieldType e) { this.value = e.value; } - public PluginFieldType intern() { for (PluginFieldType e : values()) if (e.value == value) return e; return this; } + private EngineCapability(int v) { this.value = v; } + private EngineCapability(EngineCapability e) { this.value = e.value; } + public EngineCapability intern() { for (EngineCapability e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } -// Targeting ../nvinfer/PluginField.java +/** Maximum number of elements in EngineCapability enum. @see EngineCapability */ +// Targeting ../nvinfer/Weights.java -// Targeting ../nvinfer/PluginFieldCollection.java + +// Targeting ../nvinfer/IHostMemory.java /** - * \enum PluginCapabilityType + * \enum DimensionOperation * - * \brief Enumerates the different capability types a IPluginV3 object may have - * */ -@Namespace("nvinfer1") public enum PluginCapabilityType { - /** Core capability. Every IPluginV3 object must have this. */ - kCORE(0), - /** Build capability. IPluginV3 objects provided to TensorRT build phase must have this. */ - kBUILD(1), - /** Runtime capability. IPluginV3 objects provided to TensorRT build and execution phases must have this. */ - kRUNTIME(2); + * \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations. + * + * For example, given two IDimensionExpr x and y and an IExprBuilder& eb, + * eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y. + * + * @see IDimensionExpr, IExprBuilder + * */ +@Namespace("nvinfer1") public enum DimensionOperation { + /** Sum of the two operands. */ + kSUM(0), + /** Product of the two operands. */ + kPROD(1), + /** Maximum of the two operands. */ + kMAX(2), + /** Minimum of the two operands. */ + kMIN(3), + /** Substract the second element from the first. */ + kSUB(4), + /** 1 if operands are equal, 0 otherwise. */ + kEQUAL(5), + /** 1 if first operand is less than second operand, 0 otherwise. */ + kLESS(6), + /** Floor division of the first element by the second. */ + kFLOOR_DIV(7), + /** Division rounding up */ + kCEIL_DIV(8); public final int value; - private PluginCapabilityType(int v) { this.value = v; } - private PluginCapabilityType(PluginCapabilityType e) { this.value = e.value; } - public PluginCapabilityType intern() { for (PluginCapabilityType e : values()) if (e.value == value) return e; return this; } + private DimensionOperation(int v) { this.value = v; } + private DimensionOperation(DimensionOperation e) { this.value = e.value; } + public DimensionOperation intern() { for (DimensionOperation e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } +/** Maximum number of elements in DimensionOperation enum. @see DimensionOperation */ + + /** - * \enum TensorRTPhase + * \enum TensorLocation * - * \brief Indicates a phase of operation of TensorRT + * \brief The location for tensor data storage, device or host. * */ -@Namespace("nvinfer1") public enum TensorRTPhase { - /** Build phase of TensorRT */ - kBUILD(0), - /** Execution phase of TensorRT */ - kRUNTIME(1); +@Namespace("nvinfer1") public enum TensorLocation { + /** Data stored on device. */ + kDEVICE(0), + /** Data stored on host. */ + kHOST(1); public final int value; - private TensorRTPhase(int v) { this.value = v; } - private TensorRTPhase(TensorRTPhase e) { this.value = e.value; } - public TensorRTPhase intern() { for (TensorRTPhase e : values()) if (e.value == value) return e; return this; } + private TensorLocation(int v) { this.value = v; } + private TensorLocation(TensorLocation e) { this.value = e.value; } + public TensorLocation intern() { for (TensorLocation e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } -// Targeting ../nvinfer/IPluginCreatorInterface.java +/** Maximum number of elements in TensorLocation enum. @see TensorLocation */ +// Targeting ../nvinfer/IDimensionExpr.java -// Targeting ../nvinfer/IPluginCreator.java + +// Targeting ../nvinfer/IExprBuilder.java + + +// Targeting ../nvinfer/DimsExprs.java + + +// Targeting ../nvinfer/DynamicPluginTensorDesc.java + + +// Targeting ../nvinfer/IPluginV2DynamicExt.java + + +// Targeting ../nvinfer/IStreamReader.java // namespace v_1_0 /** - * \class IPluginCreatorInterface + * \class IStreamReader * - * \brief Base class for all plugin creator versions. + * \brief Application-implemented class for reading data in a stream-based manner. * - * @see IPluginCreator and IPluginRegistry + * \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReader, not + * v_1_0::IStreamReader * */ + //! //! //! //! //! +//! +// Targeting ../nvinfer/IPluginResourceContext.java -/** - * \class IPluginCreator - * - * \brief Plugin creator class for user implemented layers. - * - * @see IPlugin and IPluginFactory - * - * @deprecated Deprecated in TensorRT 10.0. Please implement IPluginCreatorV3One instead along with IPluginV3 plugins - * instead. - * */ - // namespace nvinfer1 +// Targeting ../nvinfer/IPluginV3OneCore.java -// #endif // NV_INFER_RUNTIME_PLUGIN_H +// Targeting ../nvinfer/IPluginV3OneBuild.java -// Parsed from NvInferRuntimeCommon.h -/* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +// Targeting ../nvinfer/IPluginV3OneRuntime.java -// #ifndef NV_INFER_RUNTIME_COMMON_H + + // namespace v_1_0 + + // namespace v_2_0 + +/** + * \class IPluginV3OneCore + * + * \brief A plugin capability interface that enables the core capability (PluginCapabilityType::kCORE). + * + * @see IPluginCapability + * @see PluginCapabilityType + * @see IPluginV3::getCapabilityInterface() + * */ //! //! //! //! -//! -// #define NV_INFER_RUNTIME_COMMON_H /** - * \file NvInferRuntimeCommon.h - * - * This file provides the nvinfer1::IPluginRegistry interface, which will be moved to the NvInferRuntime.h header - * in a future release. + * \class IPluginV3OneBuild * - * \warning This file will be removed in a future release. + * \brief A plugin capability interface that enables the build capability (PluginCapabilityType::kBUILD). Exposes + * methods that allow the expression of the build time properties and behavior of a plugin. * - * \warning Do not directly include this file. Instead include NvInferRuntime.h + * @see IPluginCapability + * @see PluginCapabilityType + * @see IPluginV3::getCapabilityInterface() * */ -// #include "NvInferRuntimeBase.h" -// #undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE -// #include "NvInferRuntimePlugin.h" -// Targeting ../nvinfer/IPluginRegistry.java - - - - // namespace nvinfer1 - -// #endif /* NV_INFER_RUNTIME_COMMON_H */ - - -// Parsed from NvInferLegacyDims.h - -/* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// #ifndef NV_INFER_LEGACY_DIMS_H -// #define NV_INFER_LEGACY_DIMS_H -// #include "NvInferRuntimeBase.h" //! //! -//! - //! //! -//! -// #undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE /** - * \file NvInferLegacyDims.h - * - * This file contains declarations of legacy dimensions types which use channel - * semantics in their names, and declarations on which those types rely. - * -

+ * \class IPluginV3OneRuntime * - * \namespace nvinfer1 + * \brief A plugin capability interface that enables the runtime capability (PluginCapabilityType::kRUNTIME). Exposes + * methods that allow the expression of the runtime properties and behavior of a plugin. * - * \brief The TensorRT API version 1 namespace. + * @see IPluginCapability + * @see PluginCapabilityType + * @see IPluginV3::getCapabilityInterface() * */ -// Targeting ../nvinfer/Dims2.java - - -// Targeting ../nvinfer/DimsHW.java - - -// Targeting ../nvinfer/Dims3.java - - -// Targeting ../nvinfer/Dims4.java - - - - // namespace nvinfer1 - -// #endif // NV_INFER_LEGCY_DIMS_H - - -// Parsed from NvInferRuntime.h - -/* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// #ifndef NV_INFER_RUNTIME_H //! //! //! -// #define NV_INFER_RUNTIME_H +//! /** - * \file NvInferRuntime.h + * \class IPluginV3OneBuildV2 * - * This is the top-level API file for TensorRT extended runtime library. + * \brief A plugin capability interface that extends IPluginV3OneBuild by providing I/O aliasing functionality. + * + * @see IPluginV3OneBuild * */ - -// #include "NvInferImpl.h" -// #include "NvInferRuntimeCommon.h" -// Targeting ../nvinfer/IPluginFactory.java - - -// Targeting ../nvinfer/INoCopy.java +// Targeting ../nvinfer/IProfiler.java + // namespace v_1_0 /** - * \enum EngineCapability + * \class IProfiler * - * \brief List of supported engine capability flows. + * \brief Application-implemented interface for profiling. * - * \details The EngineCapability determines the restrictions of a network during build time and what runtime - * it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide - * any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard - * runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network - * operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe - * runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of - * network operations that are DLA compatible and the resulting serialized engine can be executed using standalone - * DLA runtime APIs. See sampleCudla for an example of integrating cuDLA APIs with TensorRT APIs. + * When this class is added to an execution context, the profiler will be called once per layer for each invocation of + * executeV2()/enqueueV3(). + * + * It is not recommended to run inference with profiler enabled when the inference execution time is critical since the + * profiler may affect execution time negatively. * */ -@Namespace("nvinfer1") public enum EngineCapability { - /** - * Standard: TensorRT flow without targeting the safety runtime. - * This flow supports both DeviceType::kGPU and DeviceType::kDLA. - * */ - + //! //! - kSTANDARD(0), - - /** - * Safety: TensorRT flow with restrictions targeting the safety runtime. - * See safety documentation for list of supported layers and formats. - * This flow supports only DeviceType::kGPU. - * - * This flag is only supported in NVIDIA Drive(R) products. */ - - //! //! - kSAFETY(1), - - /** - * DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes. - * See DLA documentation for list of supported layers and formats. - * This flow supports only DeviceType::kDLA. - * */ - kDLA_STANDALONE(2); - - public final int value; - private EngineCapability(int v) { this.value = v; } - private EngineCapability(EngineCapability e) { this.value = e.value; } - public EngineCapability intern() { for (EngineCapability e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} -/** Maximum number of elements in EngineCapability enum. @see EngineCapability */ - -// Targeting ../nvinfer/Weights.java - - -// Targeting ../nvinfer/IHostMemory.java - - /** - * \enum DimensionOperation - * - * \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations. + * \enum WeightsRole * - * For example, given two IDimensionExpr x and y and an IExprBuilder& eb, - * eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y. + * \brief How a layer uses particular Weights. * - * @see IDimensionExpr, IExprBuilder + * The power weights of an IScaleLayer are omitted. Refitting those is not supported. * */ -@Namespace("nvinfer1") public enum DimensionOperation { - /** Sum of the two operands. */ - kSUM(0), - /** Product of the two operands. */ - kPROD(1), - /** Maximum of the two operands. */ - kMAX(2), - /** Minimum of the two operands. */ - kMIN(3), - /** Substract the second element from the first. */ - kSUB(4), - /** 1 if operands are equal, 0 otherwise. */ - kEQUAL(5), - /** 1 if first operand is less than second operand, 0 otherwise. */ - kLESS(6), - /** Floor division of the first element by the second. */ - kFLOOR_DIV(7), - /** Division rounding up */ - kCEIL_DIV(8); +@Namespace("nvinfer1") public enum WeightsRole { + /** kernel for IConvolutionLayer or IDeconvolutionLayer */ + kKERNEL(0), + /** bias for IConvolutionLayer or IDeconvolutionLayer */ + kBIAS(1), + /** shift part of IScaleLayer */ + kSHIFT(2), + /** scale part of IScaleLayer */ + kSCALE(3), + /** weights for IConstantLayer */ + kCONSTANT(4), + /** Any other weights role */ + kANY(5); public final int value; - private DimensionOperation(int v) { this.value = v; } - private DimensionOperation(DimensionOperation e) { this.value = e.value; } - public DimensionOperation intern() { for (DimensionOperation e : values()) if (e.value == value) return e; return this; } + private WeightsRole(int v) { this.value = v; } + private WeightsRole(WeightsRole e) { this.value = e.value; } + public WeightsRole intern() { for (WeightsRole e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } -/** Maximum number of elements in DimensionOperation enum. @see DimensionOperation */ +/** Maximum number of elements in WeightsRole enum. @see WeightsRole */ /** - * \enum TensorLocation + * \enum DeviceType + * \brief The device that this layer/network will execute on. * - * \brief The location for tensor data storage, device or host. * */ -@Namespace("nvinfer1") public enum TensorLocation { - /** Data stored on device. */ - kDEVICE(0), - /** Data stored on host. */ - kHOST(1); +@Namespace("nvinfer1") public enum DeviceType { + /** GPU Device */ + kGPU(0), + /** DLA Core */ + kDLA(1); public final int value; - private TensorLocation(int v) { this.value = v; } - private TensorLocation(TensorLocation e) { this.value = e.value; } - public TensorLocation intern() { for (TensorLocation e : values()) if (e.value == value) return e; return this; } + private DeviceType(int v) { this.value = v; } + private DeviceType(DeviceType e) { this.value = e.value; } + public DeviceType intern() { for (DeviceType e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } -/** Maximum number of elements in TensorLocation enum. @see TensorLocation */ - -// Targeting ../nvinfer/IDimensionExpr.java - - -// Targeting ../nvinfer/IExprBuilder.java - - -// Targeting ../nvinfer/DimsExprs.java - - -// Targeting ../nvinfer/DynamicPluginTensorDesc.java - - -// Targeting ../nvinfer/IPluginV2DynamicExt.java - - -// Targeting ../nvinfer/IPluginResourceContext.java - - -// Targeting ../nvinfer/IPluginCapability.java - - - // namespace v_1_0 - -/** - * \class IPluginCapability - * - * \brief Base class for plugin capability interfaces - * - * IPluginCapability represents a split in TensorRT V3 plugins to sub-objects that expose different types of - * capabilites a plugin may have, as opposed to a single interface which defines all capabilities and behaviors of a - * plugin. - * - * \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. - * - * @see PluginCapabilityType - * */ -// Targeting ../nvinfer/IPluginV3.java - +/** Maximum number of elements in DeviceType enum. @see DeviceType */ - // namespace v_1_0 /** - * \class IPluginV3 - * - * \brief Plugin class for the V3 generation of user-implemented layers. + * \enum TempfileControlFlag * - * IPluginV3 acts as a wrapper around the plugin capability interfaces that define the actual behavior of the plugin. + * \brief Flags used to control TensorRT's behavior when creating executable temporary files. * - * @see IPluginCapability - * @see IPluginCreatorV3One - * @see IPluginRegistry + * On some platforms the TensorRT runtime may need to create files in a temporary directory or use platform-specific + * APIs to create files in-memory to load temporary DLLs that implement runtime code. These flags allow the + * application to explicitly control TensorRT's use of these files. This will preclude the use of certain TensorRT + * APIs for deserializing and loading lean runtimes. * */ -// Targeting ../nvinfer/IPluginV3OneCore.java - - -// Targeting ../nvinfer/IPluginV3OneBuild.java - +@Namespace("nvinfer1") public enum TempfileControlFlag { + /** Allow creating and loading files in-memory (or unnamed files). */ + +//! + kALLOW_IN_MEMORY_FILES(0), -// Targeting ../nvinfer/IPluginV3OneRuntime.java + /** Allow creating and loading named files in a temporary directory on the filesystem. + * + * @see IRuntime::setTemporaryDirectory() */ + kALLOW_TEMPORARY_FILES(1); + public final int value; + private TempfileControlFlag(int v) { this.value = v; } + private TempfileControlFlag(TempfileControlFlag e) { this.value = e.value; } + public TempfileControlFlag intern() { for (TempfileControlFlag e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} - // namespace v_1_0 +/** Maximum number of elements in TempfileControlFlag enum. @see TempfileControlFlag */ - // namespace v_2_0 /** - * \class IPluginV3OneCore - * - * \brief A plugin capability interface that enables the core capability (PluginCapabilityType::kCORE). + * \brief Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operations. * - * @see IPluginCapability - * @see PluginCapabilityType - * @see IPluginV3::getCapabilityInterface() - * */ + * @see TempfileControlFlag, + * IRuntime::setTempfileControlFlags(), + * IRuntime::getTempfileControlFlags() */ //! //! //! //! - -/** - * \class IPluginV3OneBuild - * - * \brief A plugin capability interface that enables the build capability (PluginCapabilityType::kBUILD). Exposes - * methods that allow the expression of the build time properties and behavior of a plugin. - * - * @see IPluginCapability - * @see PluginCapabilityType - * @see IPluginV3::getCapabilityInterface() - * */ - - +//! +//! //! //! //! //! /** - * \class IPluginV3OneRuntime + * \enum TensorFormat * - * \brief A plugin capability interface that enables the runtime capability (PluginCapabilityType::kRUNTIME). Exposes - * methods that allow the expression of the runtime properties and behavior of a plugin. + * \brief Format of the input/output tensors. * - * @see IPluginCapability - * @see PluginCapabilityType - * @see IPluginV3::getCapabilityInterface() + * This enum is used by both plugins and network I/O tensors. + * + * @see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat() + * + * Many of the formats are **vector-major** or **vector-minor**. These formats specify + * a vector dimension and scalars per vector. + * For example, suppose that the tensor has has dimensions [M,N,C,H,W], + * the vector dimension is C and there are V scalars per vector. + * + * * A **vector-major** format splits the vectorized dimension into two axes in the + * memory layout. The vectorized dimension is replaced by an axis of length ceil(C/V) + * and a new dimension of length V is appended. For the example tensor, the memory layout + * is equivalent to an array with dimensions [M][N][ceil(C/V)][H][W][V]. + * Tensor coordinate (m,n,c,h,w) maps to array location [m][n][c/V][h][w][c\%V]. + * + * * A **vector-minor** format moves the vectorized dimension to become the last axis + * in the memory layout. For the example tensor, the memory layout is equivalent to an + * array with dimensions [M][N][H][W][ceil(C/V)*V]. Tensor coordinate (m,n,c,h,w) maps + * array location subscript [m][n][h][w][c]. + * + * In interfaces that refer to "components per element", that's the value of V above. + * + * For more information about data formats, see the topic "Data Format Description" located in the + * TensorRT Developer Guide. https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#data-format-desc * */ +@Namespace("nvinfer1") public enum TensorFormat { + /** Memory layout is similar to an array in C or C++. + * The stride of each dimension is the product of the dimensions after it. + * The last dimension has unit stride. + * + * For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192]. */ + +//! + kLINEAR(0), + /** Vector-major format with two scalars per vector. + * Vector dimension is third to last. + * + * This format requires FP16 and at least three dimensions. */ + kCHW2(1), + /** Vector-minor format with eight scalars per vector. + * Vector dimension is third to last. + * This format requires FP16 or BF16 and at least three dimensions. */ + //! //! //! -//! - -/** - * \class IPluginV3OneBuildV2 - * - * \brief A plugin capability interface that extends IPluginV3OneBuild by providing I/O aliasing functionality. - * - * @see IPluginV3OneBuild - * */ -// Targeting ../nvinfer/IPluginCreatorV3One.java - - - // namespace v_1_0 - -/** - * \class IPluginCreatorV3One - * - * \brief A plugin creator class capable of producing IPluginV3 objects - * - * @see IPluginV3 - * @see IPluginRegistry - * */ -// Targeting ../nvinfer/IProfiler.java + kHWC8(2), + /** Vector-major format with four scalars per vector. + * Vector dimension is third to last. + * + * This format requires INT8 or FP16 and at least three dimensions. + * For INT8, the length of the vector dimension must be a build-time constant. + * + * Deprecated usage: + * + * If running on the DLA, this format can be used for acceleration + * with the caveat that C must be less than or equal to 4. + * If used as DLA input and the build option kGPU_FALLBACK is not specified, + * it needs to meet line stride requirement of DLA format. Column stride in + * bytes must be a multiple of 64 on Orin. */ + +//! +//! + kCHW4(3), - // namespace v_1_0 + /** Vector-major format with 16 scalars per vector. + * Vector dimension is third to last. + * + * This format requires FP16 and at least three dimensions. + * + * For DLA usage, this format maps to the native feature format for FP16, + * and the tensor sizes are limited to C,H,W in the range [1,8192]. */ + +//! +//! + kCHW16(4), -/** - * \class IProfiler - * - * \brief Application-implemented interface for profiling. - * - * When this class is added to an execution context, the profiler will be called once per layer for each invocation of - * executeV2()/enqueueV3(). - * - * It is not recommended to run inference with profiler enabled when the inference execution time is critical since the - * profiler may affect execution time negatively. - * */ + /** Vector-major format with 32 scalars per vector. + * Vector dimension is third to last. + * + * This format requires at least three dimensions. + * + * For DLA usage, this format maps to the native feature format for INT8, + * and the tensor sizes are limited to C,H,W in the range [1,8192]. */ + +//! + kCHW32(5), + /** Vector-minor format with eight scalars per vector. + * Vector dimension is fourth to last. + * + * This format requires FP16 or BF16 and at least four dimensions. */ + +//! + kDHWC8(6), + /** Vector-major format with 32 scalars per vector. + * Vector dimension is fourth to last. + * + * This format requires FP16 or INT8 and at least four dimensions. */ + //! + kCDHW32(7), + + /** Vector-minor format where channel dimension is third to last and unpadded. + * + * This format requires either FP32, FP16, UINT8, INT64 or BF16 and at least three dimensions. */ + //! + kHWC(8), + + /** DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis + * always has unit stride. The stride for stepping along the H axis is + * rounded up to 64 bytes. + * + * The memory layout is equivalent to a C array with dimensions + * [N][C][H][roundUp(W, 64/elementSize)] where elementSize is + * 2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w) + * mapping to array subscript [n][c][h][w]. */ + //! + kDLA_LINEAR(9), + + /** DLA image format. For a tensor with dimension {N, C, H, W} the C axis + * always has unit stride. The stride for stepping along the H axis is rounded up + * to 64 bytes on Orin. C can only be 1, 3 or 4. + * If C == 1, it will map to grayscale format. + * If C == 3 or C == 4, it will map to color image format. And if C == 3, + * the stride for stepping along the W axis needs to be padded to 4 in elements. + * + * When C is {1, 3, 4}, then C' is {1, 4, 4} respectively, + * the memory layout is equivalent to a C array with dimensions + * [N][H][roundUp(W, 64/C'/elementSize)][C'] on Orin + * where elementSize is 2 for FP16 + * and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array + * subscript [n][h][w][c]. */ + //! + kDLA_HWC4(10), -/** - * \enum WeightsRole - * - * \brief How a layer uses particular Weights. - * - * The power weights of an IScaleLayer are omitted. Refitting those is not supported. - * */ -@Namespace("nvinfer1") public enum WeightsRole { - /** kernel for IConvolutionLayer or IDeconvolutionLayer */ - kKERNEL(0), - /** bias for IConvolutionLayer or IDeconvolutionLayer */ - kBIAS(1), - /** shift part of IScaleLayer */ - kSHIFT(2), - /** scale part of IScaleLayer */ - kSCALE(3), - /** weights for IConstantLayer */ - kCONSTANT(4), - /** Any other weights role */ - kANY(5); + /** Vector-minor format with 16 scalars per vector. + * Vector dimension is third to last. + * + * This requires FP16 or INT8 and at least three dimensions. */ + +//! + kHWC16(11), + + /** Vector-minor format with one scalar per vector. + * Vector dimension is fourth to last. + * + * This format requires FP32 and at least four dimensions. */ + kDHWC(12); public final int value; - private WeightsRole(int v) { this.value = v; } - private WeightsRole(WeightsRole e) { this.value = e.value; } - public WeightsRole intern() { for (WeightsRole e : values()) if (e.value == value) return e; return this; } + private TensorFormat(int v) { this.value = v; } + private TensorFormat(TensorFormat e) { this.value = e.value; } + public TensorFormat intern() { for (TensorFormat e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } - -/** Maximum number of elements in WeightsRole enum. @see WeightsRole */ - +/** Maximum number of elements in TensorFormat enum. @see TensorFormat */ + // namespace impl /** - * \enum DeviceType - * \brief The device that this layer/network will execute on. + * \enum AllocatorFlag * + * \brief Allowed type of memory allocation. * */ -@Namespace("nvinfer1") public enum DeviceType { - /** GPU Device */ - kGPU(0), - /** DLA Core */ - kDLA(1); +@Namespace("nvinfer1") public enum AllocatorFlag { + /** TensorRT may call realloc() on this allocation. */ + kRESIZABLE(0); public final int value; - private DeviceType(int v) { this.value = v; } - private DeviceType(DeviceType e) { this.value = e.value; } - public DeviceType intern() { for (DeviceType e : values()) if (e.value == value) return e; return this; } + private AllocatorFlag(int v) { this.value = v; } + private AllocatorFlag(AllocatorFlag e) { this.value = e.value; } + public AllocatorFlag intern() { for (AllocatorFlag e : values()) if (e.value == value) return e; return this; } @Override public String toString() { return intern().name(); } } +/** Maximum number of elements in AllocatorFlag enum. @see AllocatorFlag */ + // namespace impl -/** Maximum number of elements in DeviceType enum. @see DeviceType */ -/** - * \enum TempfileControlFlag - * - * \brief Flags used to control TensorRT's behavior when creating executable temporary files. - * - * On some platforms the TensorRT runtime may need to create files in a temporary directory or use platform-specific - * APIs to create files in-memory to load temporary DLLs that implement runtime code. These flags allow the - * application to explicitly control TensorRT's use of these files. This will preclude the use of certain TensorRT - * APIs for deserializing and loading lean runtimes. - * */ -@Namespace("nvinfer1") public enum TempfileControlFlag { - /** Allow creating and loading files in-memory (or unnamed files). */ - //! - kALLOW_IN_MEMORY_FILES(0), +//! +//! +//! +//! +// Targeting ../nvinfer/ILogger.java - /** Allow creating and loading named files in a temporary directory on the filesystem. - * - * @see IRuntime::setTemporaryDirectory() */ - kALLOW_TEMPORARY_FILES(1); - public final int value; - private TempfileControlFlag(int v) { this.value = v; } - private TempfileControlFlag(TempfileControlFlag e) { this.value = e.value; } - public TempfileControlFlag intern() { for (TempfileControlFlag e : values()) if (e.value == value) return e; return this; } - @Override public String toString() { return intern().name(); } -} +/** Maximum number of elements in ILogger::Severity enum. @see ILogger::Severity */ + +// Targeting ../nvinfer/IGpuAllocator.java -/** Maximum number of elements in TempfileControlFlag enum. @see TempfileControlFlag */ + // namespace v_1_0 + /** - * \brief Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operations. + * \class IGpuAllocator * - * @see TempfileControlFlag, - * IRuntime::setTempfileControlFlags(), - * IRuntime::getTempfileControlFlags() */ + * \brief Application-implemented class for controlling allocation on the GPU. + * + * \warning The lifetime of an IGpuAllocator object must exceed that of all objects that use it. + * + * This class is intended as a base class for allocators that implement synchronous allocation. + * If you want the benefits of asynchronous allocation, you can do either of: + * + * * Derive your class from IGpuAllocator and override all four of its virtual methods + * for allocation/deallocation, including the two deprecated methods. + * + * * Derive your class from IGpuAsyncAllocator and override its two pure virtual + * methods for allocation/deallocation. + * + * The latter style is preferred because it does not tie code to deprecated methods. + * + * @see IGpuAsyncAllocator. + * */ //! @@ -1904,7 +1600,6 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * including two deprecated methods. * * @see IGpuAllocator */ - // namespace nvinfer1 /** @@ -3566,7 +3261,20 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { * control over which weights are refittable or not using INetworkDefinition::markWeightsRefittable and * INetworkDefinition::unmarkWeightsRefittable. By default, all weights are non-refittable when this flag is * enabled. This flag cannot be used together with kREFIT or kREFIT_IDENTICAL. */ - kREFIT_INDIVIDUAL(23); + kREFIT_INDIVIDUAL(23), + + /** Disable floating-point optimizations: 0*x => 0, x-x => 0, or x/x => 1. These identities are + * not true when x is a NaN or Inf, and thus might hide propagation or generation of NaNs. This flag is typically + * used in combination with kSPARSE_WEIGHTS. + * There are three valid sparsity configurations. + * 1. Disable all sparsity. Both kSPARSE_WEIGHTS and kSTRICT_NANS are unset + * 2. Enable sparsity only where it does not affect propagation/generation of NaNs. Both kSPARSE_WEIGHTS and + * kSTRICT_NANS are set + * 3. Enable all sparsity. kSPARSE_WEIGHTS is set and kSTRICT_NANS is unset */ + kSTRICT_NANS(24), + + /** Enable memory monitor during build time. */ + kMONITOR_MEMORY(25); public final int value; private BuilderFlag(int v) { this.value = v; } @@ -3944,6 +3652,7 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { // namespace v_1_0 + // namespace v_1_0 /** enum class nvinfer1::ActivationType */ ; @@ -4156,103 +3865,466 @@ public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer { // Targeting ../nvinfer/VRecurrenceLayer.java -// Targeting ../nvinfer/VLoopOutputLayer.java +// Targeting ../nvinfer/VLoopOutputLayer.java + + +// Targeting ../nvinfer/VTripLimitLayer.java + + +// Targeting ../nvinfer/VIteratorLayer.java + + +// Targeting ../nvinfer/VLoop.java + + +// Targeting ../nvinfer/VConditionalBoundaryLayer.java + + +// Targeting ../nvinfer/VConditionLayer.java + + +// Targeting ../nvinfer/VConditionalInputLayer.java + + +// Targeting ../nvinfer/VConditionalOutputLayer.java + + +// Targeting ../nvinfer/VIfConditional.java + + +// Targeting ../nvinfer/VSelectLayer.java + + +// Targeting ../nvinfer/VAssertionLayer.java + + +// Targeting ../nvinfer/VFillLayer.java + + +// Targeting ../nvinfer/VQuantizeLayer.java + + +// Targeting ../nvinfer/VDequantizeLayer.java + + +// Targeting ../nvinfer/VScatterLayer.java + + +// Targeting ../nvinfer/VEinsumLayer.java + + +// Targeting ../nvinfer/VOneHotLayer.java + + +// Targeting ../nvinfer/VGridSampleLayer.java + + +// Targeting ../nvinfer/VNMSLayer.java + + +// Targeting ../nvinfer/VReverseSequenceLayer.java + + +// Targeting ../nvinfer/VNormalizationLayer.java + + +// Targeting ../nvinfer/VNetworkDefinition.java + + +// Targeting ../nvinfer/VAlgorithmIOInfo.java + + +// Targeting ../nvinfer/VAlgorithmVariant.java + + +// Targeting ../nvinfer/VAlgorithmContext.java + + +// Targeting ../nvinfer/VAlgorithm.java + + +// Targeting ../nvinfer/VTimingCache.java + + +// Targeting ../nvinfer/VBuilderConfig.java + + +// Targeting ../nvinfer/VSerializationConfig.java + + +// Targeting ../nvinfer/VBuilder.java + + + + // namespace apiv + // namespace nvinfer1 + +// @endcond + +// #endif // NV_INFER_RUNTIME_IMPL_H + + +// Parsed from NvInferPluginBase.h + +/* +* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +* SPDX-License-Identifier: LicenseRef-NvidiaProprietary +* +* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +* property and proprietary rights in and to this material, related +* documentation and any modifications thereto. Any use, reproduction, +* disclosure or distribution of this material and related documentation +* without an express license agreement from NVIDIA CORPORATION or +* its affiliates is strictly prohibited. +*/ + +// #ifndef NV_INFER_PLUGIN_BASE_H +// #define NV_INFER_PLUGIN_BASE_H + +// #if !defined(NV_INFER_INTERNAL_INCLUDE) +// #endif +// #include "NvInferRuntimeBase.h" +// #undef NV_INFER_INTERNAL_INCLUDE + +/** + * \enum PluginFieldType + * + * \brief The possible field types for custom layer. + * */ +@Namespace("nvinfer1") public enum PluginFieldType { + /** FP16 field type. */ + kFLOAT16(0), + /** FP32 field type. */ + kFLOAT32(1), + /** FP64 field type. */ + kFLOAT64(2), + /** INT8 field type. */ + kINT8(3), + /** INT16 field type. */ + kINT16(4), + /** INT32 field type. */ + kINT32(5), + /** char field type. */ + kCHAR(6), + /** nvinfer1::Dims field type. */ + kDIMS(7), + /** Unknown field type. */ + kUNKNOWN(8), + /** BF16 field type. */ + kBF16(9), + /** INT64 field type. */ + kINT64(10), + /** FP8 field type. */ + kFP8(11), + /** INT4 field type. */ + kINT4(12); + + public final int value; + private PluginFieldType(int v) { this.value = v; } + private PluginFieldType(PluginFieldType e) { this.value = e.value; } + public PluginFieldType intern() { for (PluginFieldType e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} +// Targeting ../nvinfer/PluginField.java + + +// Targeting ../nvinfer/PluginFieldCollection.java + + + +/** + * \enum TensorRTPhase + * + * \brief Indicates a phase of operation of TensorRT + * */ +@Namespace("nvinfer1") public enum TensorRTPhase { + /** Build phase of TensorRT */ + kBUILD(0), + /** Execution phase of TensorRT */ + kRUNTIME(1); + + public final int value; + private TensorRTPhase(int v) { this.value = v; } + private TensorRTPhase(TensorRTPhase e) { this.value = e.value; } + public TensorRTPhase intern() { for (TensorRTPhase e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} +/** + * \enum PluginCapabilityType + * + * \brief Enumerates the different capability types a IPluginV3 object may have + * */ +@Namespace("nvinfer1") public enum PluginCapabilityType { + /** Core capability. Every IPluginV3 object must have this. */ + kCORE(0), + /** Build capability. IPluginV3 objects provided to TensorRT build phase must have this. */ + kBUILD(1), + /** Runtime capability. IPluginV3 objects provided to TensorRT build and execution phases must have this. */ + kRUNTIME(2); -// Targeting ../nvinfer/VTripLimitLayer.java + public final int value; + private PluginCapabilityType(int v) { this.value = v; } + private PluginCapabilityType(PluginCapabilityType e) { this.value = e.value; } + public PluginCapabilityType intern() { for (PluginCapabilityType e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} +// Targeting ../nvinfer/IPluginCapability.java -// Targeting ../nvinfer/VIteratorLayer.java +// Targeting ../nvinfer/IPluginResource.java -// Targeting ../nvinfer/VLoop.java +// Targeting ../nvinfer/IPluginCreatorInterface.java -// Targeting ../nvinfer/VConditionalBoundaryLayer.java +// Targeting ../nvinfer/IPluginV3.java -// Targeting ../nvinfer/VConditionLayer.java +// Targeting ../nvinfer/IPluginCreatorV3One.java -// Targeting ../nvinfer/VConditionalInputLayer.java + // namespace v_1_0 -// Targeting ../nvinfer/VConditionalOutputLayer.java +/** + * \class IPluginCreatorV3One + * + * \brief A plugin creator class capable of producing IPluginV3 objects + * + * @see IPluginV3 + * @see IPluginRegistry + * */ -// Targeting ../nvinfer/VIfConditional.java +//! +//! +//! +//! +/** + * \class IPluginResource + * + * \brief Interface for plugins to define custom resources that could be shared through the plugin registry + * + * @see IPluginRegistry::acquirePluginResource + * @see IPluginRegistry::releasePluginResource + * */ -// Targeting ../nvinfer/VSelectLayer.java +//! +//! +//! +//! -// Targeting ../nvinfer/VAssertionLayer.java +/** + * \class IPluginCreatorInterface + * + * \brief Base class for all plugin creator versions. + * + * @see IPluginCreator and IPluginRegistry + * */ -// Targeting ../nvinfer/VFillLayer.java +//! +//! +//! +//! +//! +/** + * \class IPluginV3 + * + * \brief Plugin class for the V3 generation of user-implemented layers. + * + * IPluginV3 acts as a wrapper around the plugin capability interfaces that define the actual behavior of the plugin. + * + * @see IPluginCapability + * @see IPluginCreatorV3One + * @see IPluginRegistry + * */ -// Targeting ../nvinfer/VQuantizeLayer.java +//! +//! +//! +//! +//! +//! -// Targeting ../nvinfer/VDequantizeLayer.java +/** + * \class IPluginCapability + * + * \brief Base class for plugin capability interfaces + * + * IPluginCapability represents a split in TensorRT V3 plugins to sub-objects that expose different types of + * capabilites a plugin may have, as opposed to a single interface which defines all capabilities and behaviors of a + * plugin. + * + * \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. + * + * @see PluginCapabilityType + * */ + // namespace nvinfer1 +// #endif /* NV_INFER_PLUGIN_BASE_H */ -// Targeting ../nvinfer/VScatterLayer.java +// Parsed from NvInferRuntimePlugin.h -// Targeting ../nvinfer/VEinsumLayer.java +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// #ifndef NV_INFER_RUNTIME_PLUGIN_H +// #define NV_INFER_RUNTIME_PLUGIN_H +// #include "NvInferPluginBase.h" -// Targeting ../nvinfer/VOneHotLayer.java +//! +//! +//! +//! -// Targeting ../nvinfer/VGridSampleLayer.java +//! +//! +//! +// #undef NV_INFER_INTERNAL_INCLUDE +/** + * \file NvInferRuntimePlugin.h + * + * This file contains common definitions, data structures and interfaces that relate to plugins and are shared + * between the standard and safe runtime. + * + * \warning Do not directly include this file. Instead include NvInferRuntime.h + * +

+ * + * \namespace nvinfer1 + * + * \brief The TensorRT API version 1 namespace. + * */ -// Targeting ../nvinfer/VNMSLayer.java +/** enum class nvinfer1::TensorFormat */ +; -// Targeting ../nvinfer/VReverseSequenceLayer.java +//! +//! +//! -// Targeting ../nvinfer/VNormalizationLayer.java +/** + * \brief PluginFormat is reserved for backward compatibility. + * + * @see IPluginV2::supportsFormat() + * */ -// Targeting ../nvinfer/VNetworkDefinition.java +//! +//! +/** + * \brief Bit at the plugin version to identify that it is a plugin. + * */ -// Targeting ../nvinfer/VAlgorithmIOInfo.java +//! +//! +//! +//! +//! +@Namespace("nvinfer1") @MemberGetter public static native int kPLUGIN_VERSION_PYTHON_BIT(); +public static final int kPLUGIN_VERSION_PYTHON_BIT = kPLUGIN_VERSION_PYTHON_BIT(); +// Targeting ../nvinfer/PluginTensorDesc.java -// Targeting ../nvinfer/VAlgorithmVariant.java -// Targeting ../nvinfer/VAlgorithmContext.java +/** + * \struct PluginVersion + * + * \brief Definition of plugin versions. + * + * Tag for plug-in versions. Used in upper byte of getTensorRTVersion(). + * */ +@Namespace("nvinfer1") public enum PluginVersion { + /** IPluginV2 */ + kV2((byte)(0)), + /** IPluginV2Ext */ + kV2_EXT((byte)(1)), + /** IPluginV2IOExt */ + kV2_IOEXT((byte)(2)), + /** IPluginV2DynamicExt */ + kV2_DYNAMICEXT((byte)(3)), + /** IPluginV2DynamicExt-based Python plugins */ + kV2_DYNAMICEXT_PYTHON((byte)(kPLUGIN_VERSION_PYTHON_BIT | 3)); + public final byte value; + private PluginVersion(byte v) { this.value = v; } + private PluginVersion(PluginVersion e) { this.value = e.value; } + public PluginVersion intern() { for (PluginVersion e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} -// Targeting ../nvinfer/VAlgorithm.java +/** + * \enum PluginCreatorVersion + * + * \brief Enum to identify version of the plugin creator. + * */ +@Namespace("nvinfer1") public enum PluginCreatorVersion { + /** IPluginCreator */ + kV1(0), + /** IPluginCreator-based Python plugin creators */ + kV1_PYTHON(kPLUGIN_VERSION_PYTHON_BIT); + public final int value; + private PluginCreatorVersion(int v) { this.value = v; } + private PluginCreatorVersion(PluginCreatorVersion e) { this.value = e.value; } + public PluginCreatorVersion intern() { for (PluginCreatorVersion e : values()) if (e.value == value) return e; return this; } + @Override public String toString() { return intern().name(); } +} +// Targeting ../nvinfer/IPluginV2.java -// Targeting ../nvinfer/VTimingCache.java +// Targeting ../nvinfer/IPluginV2Ext.java -// Targeting ../nvinfer/VBuilderConfig.java +// Targeting ../nvinfer/IPluginV2IOExt.java -// Targeting ../nvinfer/VSerializationConfig.java +// Targeting ../nvinfer/IPluginCreator.java -// Targeting ../nvinfer/VBuilder.java + // namespace v_1_0 +/** + * \class IPluginCreator + * + * \brief Plugin creator class for user implemented layers. + * + * @see IPlugin and IPluginFactory + * + * @deprecated Deprecated in TensorRT 10.0. Please implement IPluginCreatorV3One instead along with IPluginV3 plugins + * instead. + * */ - // namespace apiv // namespace nvinfer1 -// @endcond - -// #endif // NV_INFER_RUNTIME_IMPL_H +// #endif // NV_INFER_RUNTIME_PLUGIN_H } diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java index f27e1a31252..06ffe336b25 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java @@ -250,7 +250,7 @@ public class IBuilder extends INoCopy { * * @return A pointer to a IHostMemory object that contains a serialized network. * - * \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning. + * \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning. * * @see INetworkDefinition, IBuilderConfig, IHostMemory * */ @@ -262,8 +262,34 @@ public class IBuilder extends INoCopy { //! //! //! + //! public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config); + /** + * \brief Builds a network for the given INetworkDefinition and IBuilderConfig. + * + * @param network Network definition. + * @param config Builder configuration. + * + * @return A pointer to a ICudaEngine object that contains an engine. + * + * \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning. + * + * \note This function does not support \p BuilderFlag::kVERSION_COMPATIBLE. + * Please use \p buildSerializedNetwork to get a version compatible engine. + * + * @see INetworkDefinition, IBuilderConfig, ICudaEngine + * */ + + + //! + //! + //! + //! + //! + //! + public native @NoException(true) ICudaEngine buildEngineWithConfig(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config); + /** * \brief Checks that a network is within the scope of the IBuilderConfig settings. * @@ -279,7 +305,7 @@ public class IBuilder extends INoCopy { * @return True if network is within the scope of the restrictions specified by the builder config, * false otherwise. * - * \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning. + * \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning. * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java index 6a0da55a285..38d173b53e9 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java @@ -365,9 +365,9 @@ public class IBuilderConfig extends INoCopy { public native @NoException(true) void reset(); /** - * \brief Set the cuda stream that is used to profile this network. + * \brief Set the CUDA stream that is used to profile this network. * - * @param stream The cuda stream used for profiling by the builder. + * @param stream The CUDA stream used for profiling by the builder. * * @see getProfileStream() * */ @@ -380,9 +380,9 @@ public class IBuilderConfig extends INoCopy { public native @NoException(true) void setProfileStream(CUstream_st stream); /** - * \brief Get the cuda stream that is used to profile this network. + * \brief Get the CUDA stream that is used to profile this network. * - * @return The cuda stream set by setProfileStream, nullptr if setProfileStream has not been called. + * @return The CUDA stream set by setProfileStream, nullptr if setProfileStream has not been called. * * @see setProfileStream() * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IDebugListener.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IDebugListener.java index 958d98cedf3..256b6b2d876 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IDebugListener.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IDebugListener.java @@ -44,7 +44,7 @@ public class IDebugListener extends IVersionedInterface { * @param type data Type of the tensor. * @param shape shape of the tensor. * @param name name of the tensor. - * @param stream Cuda stream object. + * @param stream CUDA stream object. * * @return True on success, false otherwise. * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java index c0d00e3ab67..d643b2e5038 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java @@ -148,7 +148,7 @@ public class IExecutionContext extends INoCopy { /** * \brief Set the device memory for use by this execution context. * - * The memory must be aligned with cuda memory alignment property (using cudaGetDeviceProperties()), and its size + * The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size * must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and * getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the * reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of @@ -178,7 +178,7 @@ public class IExecutionContext extends INoCopy { /** * \brief Set the device memory and its corresponding size for use by this execution context. * - * The memory must be aligned with cuda memory alignment property (using cudaGetDeviceProperties()), and its size + * The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size * must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and * getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the * reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of @@ -431,7 +431,7 @@ public class IExecutionContext extends INoCopy { * @param profileIndex Index of the profile. The value must lie between 0 and * getEngine().getNbOptimizationProfiles() - 1 * - * @param stream A cuda stream on which the cudaMemcpyAsyncs may be + * @param stream A CUDA stream on which the cudaMemcpyAsyncs may be * enqueued * * When an optimization profile is switched via this API, TensorRT may @@ -778,7 +778,7 @@ public class IExecutionContext extends INoCopy { /** * \brief Mark input as consumed. * - * @param event The cuda event that is triggered after all input tensors have been consumed. + * @param event The CUDA event that is triggered after all input tensors have been consumed. * * \warning The set event must be valid during the inferece. * @@ -796,7 +796,7 @@ public class IExecutionContext extends INoCopy { /** * \brief The event associated with consuming the input. * - * @return The cuda event. Nullptr will be returned if the event is not set yet. + * @return The CUDA event. Nullptr will be returned if the event is not set yet. * */ @@ -915,7 +915,7 @@ public class IExecutionContext extends INoCopy { /** * \brief Enqueue inference on a stream. * - * @param stream A cuda stream on which the inference kernels will be enqueued. + * @param stream A CUDA stream on which the inference kernels will be enqueued. * * @return True if the kernels were enqueued successfully, false otherwise. * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java index d3ff014add6..d1952b6495a 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java @@ -17,10 +17,7 @@ import static org.bytedeco.cuda.global.nvrtc.*; import static org.bytedeco.tensorrt.global.nvinfer.*; - - -/** DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD. - * The name v_1_0 may change in future versions of TensoRT. */ + // namespace impl @Namespace("nvinfer1::v_1_0") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IGpuAllocator extends IVersionedInterface { diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILogger.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILogger.java index db186e21cd7..e6cc3ca4289 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILogger.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/ILogger.java @@ -19,7 +19,10 @@ import static org.bytedeco.tensorrt.global.nvinfer.*; -/** +/** DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD. + * The name v_1_0 may change in future versions of TensoRT. +

+ * * \class ILogger * * \brief Application-implemented logging interface for the builder, refitter and runtime. diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java index 02820857d29..112db030bd0 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java @@ -18,7 +18,6 @@ import static org.bytedeco.tensorrt.global.nvinfer.*; - @Namespace("nvinfer1::v_1_0") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IPluginCreator extends IPluginCreatorInterface { static { Loader.load(); } diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorInterface.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorInterface.java index 995150e5f62..abd504a022a 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorInterface.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorInterface.java @@ -17,6 +17,7 @@ import static org.bytedeco.cuda.global.nvrtc.*; import static org.bytedeco.tensorrt.global.nvinfer.*; + // class IPluginResource @Namespace("nvinfer1::v_1_0") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IPluginCreatorInterface extends IVersionedInterface { diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorV3One.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorV3One.java index ac753b0147c..8cdd654ad41 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorV3One.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginCreatorV3One.java @@ -18,6 +18,7 @@ import static org.bytedeco.tensorrt.global.nvinfer.*; + @Namespace("nvinfer1::v_1_0") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IPluginCreatorV3One extends IPluginCreatorInterface { static { Loader.load(); } @@ -34,6 +35,7 @@ public class IPluginCreatorV3One extends IPluginCreatorInterface { //! //! //! + //! public native @ByVal @NoException(true) InterfaceInfo getInterfaceInfo(); /** @@ -47,6 +49,10 @@ public class IPluginCreatorV3One extends IPluginCreatorInterface { * runtime interface will be passed as fc. * * \note The returned plugin object must be in an initialized state + * + * \note If invoked by the user (e.g. with TensorRTPhase::kBUILD, to add to the network defintion with + * addPluginV3()), it is the user's responsibility to delete the plugin object. If invoked by TensorRT (e.g. during + * engine deserialization), TensorRT will delete any objects it creates. * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginResourceContext.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginResourceContext.java index 7df6b53b7e3..09c1d43e4b5 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginResourceContext.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginResourceContext.java @@ -19,6 +19,7 @@ import static org.bytedeco.tensorrt.global.nvinfer.*; + /** * \class IPluginResourceContext * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3.java index 0c1acc074cf..3233e2b6e7b 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3.java @@ -18,6 +18,7 @@ import static org.bytedeco.tensorrt.global.nvinfer.*; + @Namespace("nvinfer1::v_1_0") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class) public class IPluginV3 extends IVersionedInterface { static { Loader.load(); } diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3OneBuild.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3OneBuild.java index 6ce6988079f..10c762e260f 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3OneBuild.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IPluginV3OneBuild.java @@ -47,6 +47,7 @@ public class IPluginV3OneBuild extends IPluginCapability { //! //! //! + //! public native @ByVal @NoException(true) InterfaceInfo getInterfaceInfo(); /** @@ -65,6 +66,8 @@ public class IPluginV3OneBuild extends IPluginCapability { * @param nbInputs Number of input tensors. * @param out The output tensors attributes that are used for configuration. * @param nbOutputs Number of output tensors. + * + * @return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT). * */ diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java index bfb8cd472a5..0a665bdb3e2 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java @@ -182,11 +182,13 @@ public class IRuntime extends INoCopy { * */ + //! //! //! public native ICudaEngine deserializeCudaEngine(@ByRef IStreamReader streamReader); + /** * \brief get the logger with which the runtime was created * diff --git a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java index 5d6d4177490..ac98c0e77c0 100644 --- a/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java +++ b/tensorrt/src/gen/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java @@ -37,10 +37,12 @@ public class VBuilder extends VRoot { public native @NoException(true) IErrorRecorder getErrorRecorder(); public native @NoException(true) void reset(); public native @Cast("bool") @NoException(true) boolean platformHasTf32(); - public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config); + public native @NoException(true) IHostMemory buildSerializedNetwork( + @ByRef INetworkDefinition network, @ByRef IBuilderConfig config); public native @Cast("bool") @NoException(true) boolean isNetworkSupported(@Const @ByRef INetworkDefinition network, @Const @ByRef IBuilderConfig config); public native @NoException(true) ILogger getLogger(); public native @Cast("bool") @NoException(true) boolean setMaxThreads(int maxThreads); public native @NoException(true) int getMaxThreads(); public native @ByRef @NoException(true) IPluginRegistry getPluginRegistry(); + public native @NoException(true) ICudaEngine buildEngineWithConfig(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config); } diff --git a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java index 631ed304ab6..cd38dcc533b 100644 --- a/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java +++ b/tensorrt/src/main/java/org/bytedeco/tensorrt/presets/nvinfer.java @@ -47,11 +47,12 @@ @Platform( value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"}, compiler = "cpp11", - include = {"NvInferVersion.h", "NvInferRuntimeBase.h", "NvInferRuntimePlugin.h", "NvInferRuntimeCommon.h", - "NvInferLegacyDims.h", "NvInferRuntime.h", "NvInfer.h", "NvInferImpl.h"/*, "NvUtils.h"*/}, + include = {"NvInferVersion.h", "NvInferRuntimeBase.h", "NvInferRuntimeCommon.h", + "NvInferLegacyDims.h", "NvInferRuntime.h", "NvInfer.h", "NvInferImpl.h", + "NvInferPluginBase.h", "NvInferRuntimePlugin.h", /*, "NvUtils.h"*/}, exclude = "NvInferRuntimeBase.h", link = "nvinfer@.10", - preload = "nvinfer_builder_resource@.10.5.0" + preload = "nvinfer_builder_resource@.10.6.0" ), @Platform( value = "linux-arm64", diff --git a/tritonserver/samples/unsupported/pom.xml b/tritonserver/samples/unsupported/pom.xml index b4b253724c5..4246bd5bcf5 100644 --- a/tritonserver/samples/unsupported/pom.xml +++ b/tritonserver/samples/unsupported/pom.xml @@ -18,7 +18,7 @@ org.bytedeco tensorrt-platform - 10.5-1.5.11-SNAPSHOT + 10.6-1.5.11-SNAPSHOT org.bytedeco