From c92f72ebebf5f4a1e63b726e6e5cec1a47250bb5 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 16 Jan 2024 11:59:03 -0500 Subject: [PATCH 01/15] Merge Linux Nuget GPU pipeline with zip-nuget (#19120) ### Description ### Motivation and Context --- .../c-api-noopenmp-packaging-pipelines.yml | 174 ++---------------- .../nuget-linux-cuda-packaging-stage.yml | 18 +- 2 files changed, 31 insertions(+), 161 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index f80b035582f18..2169a3ce1bb9e 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -83,6 +83,16 @@ resources: variables: - name: ReleaseVersionSuffix value: '' +- name: docker_base_image + ${{ if eq(parameters.CudaVersion, '11.8') }}: + value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8 + ${{ if eq(parameters.CudaVersion, '12.2') }}: + value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8 +- name: linux_trt_version + ${{ if eq(parameters.CudaVersion, '11.8') }}: + value: 8.6.1.6-1.cuda11.8 + ${{ if eq(parameters.CudaVersion, '12.2') }}: + value: 8.6.1.6-1.cuda12.0 stages: - stage: Setup @@ -189,64 +199,11 @@ stages: AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos' BuildVariant: 'default' -- stage: Linux_C_API_Packaging_GPU_x64 - dependsOn: [] - jobs: - - job: - workspace: - clean: all - timeoutInMinutes: 120 - pool: 'Onnxruntime-Linux-GPU' - variables: - - name: CUDA_VERSION_MAJOR - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: '11' - ${{ if eq(parameters.CudaVersion, '12.2') }}: - value: '12' - - name: CUDA_VERSION - value: ${{ parameters.CudaVersion }} - steps: - - template: templates/set-version-number-variables-step.yml - - template: templates/get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile - Context: tools/ci_build/github/linux/docker/inference/x64/default/gpu - DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )" - Repository: onnxruntimecuda$(CUDA_VERSION_MAJOR)build - - - script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_cuda_c_api_package.sh - workingDirectory: $(Build.SourcesDirectory) - displayName: 'Build and Test' - - - template: templates/java-api-artifacts-package-and-publish-steps-posix.yml - parameters: - arch: 'linux-x64' - buildConfig: 'Release' - artifactName: 'onnxruntime-java-linux-x64-cuda' - version: '$(OnnxRuntimeVersion)' - libraryName: 'libonnxruntime.so' - nativeLibraryName: 'libonnxruntime4j_jni.so' - - - template: templates/c-api-artifacts-package-and-publish-steps-posix.yml - parameters: - buildConfig: 'Release' - artifactName: 'onnxruntime-linux-x64-cuda-$(OnnxRuntimeVersion)' - artifactNameNoVersionString: 'onnxruntime-linux-x64-cuda' - libraryName: 'libonnxruntime.so.$(OnnxRuntimeVersion)' - - - template: templates/component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - template: templates/clean-agent-build-directory-step.yml - -- template: templates/linux-gpu-tensorrt-packaging-pipeline.yml +- template: stages/nuget-linux-cuda-packaging-stage.yml parameters: - artifactName: 'onnxruntime-linux-x64-tensorrt-$(OnnxRuntimeVersion)' - artifactNameNoVersionString: 'onnxruntime-linux-x64-tensorrt' - buildJava: true - buildJavaOption: '--build_java' - buildNodejs: true - buildNodejsOption: '--build_nodejs' + CudaVersion: ${{ parameters.CudaVersion }} + docker_base_image: ${{ variables.docker_base_image }} + linux_trt_version: ${{ variables.linux_trt_version }} #CUDA without tensorrt - template: templates/win-ci.yml @@ -527,109 +484,6 @@ stages: displayName: 'Clean Agent Directories' condition: always() -- stage: Linux_Packaging_combined_GPU - dependsOn: - - Linux_C_API_Packaging_GPU_x64 - - Linux_C_API_Packaging_GPU_TensorRT_x64 - condition: succeeded() - jobs: - - job: - workspace: - clean: all - pool: 'Onnxruntime-Linux-GPU' - - steps: - - checkout: self # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime - submodules: false - - checkout: onnxruntime-inference-examples # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime-inference-examples - submodules: false - - checkout: manylinux # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/manylinux - submodules: false - - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 - displayName: 'Clean Agent Directories' - condition: always() - - - script: | - set -e -x - cd $(Build.SourcesDirectory) - mv manylinux onnxruntime - ls - - - template: templates/with-container-registry-steps.yml - parameters: - Steps: - - script: | - tools/ci_build/get_docker_image.py \ - --dockerfile tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda \ - --context tools/ci_build/github/linux/docker \ - --docker-build-args "--network=host --build-arg BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 --build-arg TRT_VERSION=8.6.1.6-1.cuda11.8 --build-arg BUILD_UID=$( id -u )" \ - --container-registry onnxruntimebuildcache \ - --multiple_repos \ - --repository onnxruntimecuda118xtrt86build - displayName: "Get onnxruntimecuda118xtrt86build image for tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda" - workingDirectory: $(Build.SourcesDirectory)/onnxruntime - ContainerRegistry: onnxruntimebuildcache - - - template: templates/set-version-number-variables-step.yml - parameters: - versionFileDirectory: '$(Build.SourcesDirectory)/onnxruntime' - workingDirectory: '$(Build.SourcesDirectory)/onnxruntime' - - task: DownloadPipelineArtifact@2 - displayName: 'Download Pipeline Artifact - Combined GPU' - inputs: - artifactName: 'onnxruntime-linux-x64-cuda' - targetPath: '$(Build.BinariesDirectory)/tgz-artifacts' - - - task: DownloadPipelineArtifact@2 - displayName: 'Download Pipeline Artifact - Combined GPU' - inputs: - artifactName: 'onnxruntime-linux-x64-tensorrt' - targetPath: '$(Build.BinariesDirectory)/tgz-artifacts' - - - task: ShellScript@2 - displayName: 'Shell Script' - inputs: - scriptPath: 'onnxruntime/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh' - args: '-a $(Build.BinariesDirectory)/tgz-artifacts' - workingDirectory: '$(Build.BinariesDirectory)/tgz-artifacts' - - - task: ArchiveFiles@2 - inputs: - rootFolderOrFile: '$(Build.BinariesDirectory)/tgz-artifacts/onnxruntime-linux-x64-gpu' - includeRootFolder: false - archiveType: 'tar' # Options: zip, 7z, tar, wim - tarCompression: 'gz' - archiveFile: '$(Build.ArtifactStagingDirectory)/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz' - replaceExistingArchive: true - - - template: templates/validate-package.yml - parameters: - PackageType: 'tarball' - PackagePath: '$(Build.ArtifactStagingDirectory)' - PackageName: 'onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz' - ScriptPath: '$(Build.SourcesDirectory)/onnxruntime/tools/nuget/validate_package.py' - PlatformsSupported: 'linux-x64' - VerifyNugetSigning: false - workingDirectory: '$(Build.ArtifactStagingDirectory)' - - - - task: CmdLine@2 - displayName: 'Test C API application for GPU package' - inputs: - script: | - docker run --gpus all -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e NVIDIA_VISIBLE_DEVICES=all --rm --volume /data/models:/data/models --volume $(Build.SourcesDirectory):/src_dir \ - --volume $(Build.ArtifactStagingDirectory):/artifact_src -e NIGHTLY_BUILD onnxruntimecuda118xtrt86build \ - /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet/run_capi_application.sh -o /src_dir/onnxruntime -p /artifact_src/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz -w /src_dir/onnxruntime-inference-examples/c_cxx/squeezenet - workingDirectory: '$(Build.ArtifactStagingDirectory)' - - - task: PublishPipelineArtifact@1 - inputs: - targetPath: '$(Build.ArtifactStagingDirectory)/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz' - artifactName: 'onnxruntime-linux-x64-gpu' - - template: templates/component-governance-component-detection-steps.yml - parameters : - condition : 'succeeded' - - stage: Windows_Packaging_combined_GPU dependsOn: diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml index 48a6e0e8529e6..dbbc9ef27e513 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml @@ -40,7 +40,16 @@ stages: - script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/build_cuda_c_api_package.sh workingDirectory: $(Build.SourcesDirectory) displayName: 'Build and Test' - +# We only support Maven package for CUDA 11.8 + - ${{ if eq(parameters.CudaVersion, '11.8') }}: + - template: ../templates/java-api-artifacts-package-and-publish-steps-posix.yml + parameters: + arch: 'linux-x64' + buildConfig: 'Release' + artifactName: 'onnxruntime-java-linux-x64-cuda' + version: '$(OnnxRuntimeVersion)' + libraryName: 'libonnxruntime.so' + nativeLibraryName: 'libonnxruntime4j_jni.so' - template: ../templates/c-api-artifacts-package-and-publish-steps-posix.yml parameters: buildConfig: 'Release' @@ -82,6 +91,10 @@ stages: - checkout: manylinux # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/manylinux submodules: false + - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 + displayName: 'Clean Agent Directories' + condition: always() + - script: | set -e -x cd $(Build.SourcesDirectory) @@ -159,3 +172,6 @@ stages: inputs: targetPath: '$(Build.ArtifactStagingDirectory)/onnxruntime-linux-x64-gpu-$(OnnxRuntimeVersion).tgz' artifactName: 'onnxruntime-linux-x64-gpu' + - template: ../templates/component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' \ No newline at end of file From e2e488d6f8bcd14f40e9e2c8e65f310ce9c0e872 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 16 Jan 2024 09:18:35 -0800 Subject: [PATCH 02/15] Revert "iOS packaging pipeline stability" (#19135) Reverts microsoft/onnxruntime#19097 because it broken Android CI pipeline. --- .../external/onnxruntime_external_deps.cmake | 74 +++++++++---------- .../mac-ios-packaging-pipeline.yml | 2 +- .../stages/mac-ios-packaging-build-stage.yml | 7 +- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index c79bb87fd7f5d..78f63227c8392 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -108,14 +108,41 @@ FetchContent_Declare( ) # Download a protoc binary from Internet if needed -if(NOT ONNX_CUSTOM_PROTOC_EXECUTABLE) +if(CMAKE_CROSSCOMPILING AND NOT ONNX_CUSTOM_PROTOC_EXECUTABLE) # This part of code is only for users' convenience. The code couldn't handle all cases. Users always can manually # download protoc from Protobuf's Github release page and pass the local path to the ONNX_CUSTOM_PROTOC_EXECUTABLE # variable. - if (APPLE) - # Using CMAKE_CROSSCOMPILING is not recommended for Apple target devices. - # https://cmake.org/cmake/help/v3.26/variable/CMAKE_CROSSCOMPILING.html - # To keep it simple, just download and use the universal protoc binary for Apple builds. + message("CMAKE_HOST_SYSTEM_NAME: ${CMAKE_HOST_SYSTEM_NAME}") + if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") + FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win64} URL_HASH SHA1=${DEP_SHA1_protoc_win64}) + FetchContent_Populate(protoc_binary) + elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86") + FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win32} URL_HASH SHA1=${DEP_SHA1_protoc_win32}) + FetchContent_Populate(protoc_binary) + endif() + if(protoc_binary_SOURCE_DIR) + message("Use prebuilt protoc") + set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc.exe) + set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) + endif() + elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux") + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") + FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x64}) + FetchContent_Populate(protoc_binary) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$") + FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x86} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x86}) + FetchContent_Populate(protoc_binary) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64.*") + FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_aarch64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_aarch64}) + FetchContent_Populate(protoc_binary) + endif() + if(protoc_binary_SOURCE_DIR) + message("Use prebuilt protoc") + set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc) + set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) + endif() + elseif ((CMAKE_SYSTEM_NAME STREQUAL "Emscripten" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin") FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_mac_universal} URL_HASH SHA1=${DEP_SHA1_protoc_mac_universal}) FetchContent_Populate(protoc_binary) if(protoc_binary_SOURCE_DIR) @@ -123,38 +150,6 @@ if(NOT ONNX_CUSTOM_PROTOC_EXECUTABLE) set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc) set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) endif() - elseif(CMAKE_CROSSCOMPILING) - message("CMAKE_HOST_SYSTEM_NAME: ${CMAKE_HOST_SYSTEM_NAME}") - if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") - if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") - FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win64} URL_HASH SHA1=${DEP_SHA1_protoc_win64}) - FetchContent_Populate(protoc_binary) - elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86") - FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win32} URL_HASH SHA1=${DEP_SHA1_protoc_win32}) - FetchContent_Populate(protoc_binary) - endif() - if(protoc_binary_SOURCE_DIR) - message("Use prebuilt protoc") - set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc.exe) - set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) - endif() - elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux") - if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") - FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x64}) - FetchContent_Populate(protoc_binary) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$") - FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x86} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x86}) - FetchContent_Populate(protoc_binary) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64.*") - FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_aarch64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_aarch64}) - FetchContent_Populate(protoc_binary) - endif() - if(protoc_binary_SOURCE_DIR) - message("Use prebuilt protoc") - set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc) - set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE}) - endif() - endif() endif() endif() @@ -189,9 +184,9 @@ FetchContent_Declare( ) set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE) -#TODO: we'd better to turn the following option off. However, it will cause +#TODO: we'd better to turn the following option off. However, it will cause # ".\build.bat --config Debug --parallel --skip_submodule_sync --update" fail with an error message: -# install(EXPORT "ONNXTargets" ...) includes target "onnx_proto" which requires target "libprotobuf-lite" that is +# install(EXPORT "ONNXTargets" ...) includes target "onnx_proto" which requires target "libprotobuf-lite" that is # not in any export set. #set(protobuf_INSTALL OFF CACHE BOOL "Install protobuf binaries and files" FORCE) set(protobuf_USE_EXTERNAL_GTEST ON CACHE BOOL "" FORCE) @@ -567,3 +562,4 @@ endif() FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) + diff --git a/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml index 34a51649fc384..5fd15b64e03b6 100644 --- a/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/mac-ios-packaging-pipeline.yml @@ -53,7 +53,7 @@ stages: displayName: "Set common variables" pool: - vmImage: "macOS-12" # macOS-13 seems less stable. macOS-12 will work for this job. + vmImage: "macOS-13" timeoutInMinutes: 5 diff --git a/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml b/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml index ed32c5d0e15be..d1dff0769e25f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/stages/mac-ios-packaging-build-stage.yml @@ -78,6 +78,10 @@ stages: pip install -r tools/ci_build/github/apple/ios_packaging.requirements.txt displayName: "Install Python requirements" + - script: | + $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/inference/x64/python/cpu/scripts/install_protobuf.sh -p $(Build.BinariesDirectory)/protobuf_install -d $(Build.SourcesDirectory)/cmake/deps.txt + displayName: "Build Host Protoc" + # create and test mobile pods - script: | python tools/ci_build/github/apple/build_and_assemble_apple_pods.py \ @@ -87,7 +91,8 @@ stages: --test \ --variant ${{ parameters.packageVariant }} \ --build-settings-file "${{ variables.buildSettingsFile }}" \ - ${{ variables.optionalIncludeOpsByConfigOption }} + ${{ variables.optionalIncludeOpsByConfigOption }} \ + -b="--path_to_protoc_exe=$(Build.BinariesDirectory)/protobuf_install/bin/protoc" displayName: "Build macOS/iOS framework and assemble pod package files" - script: | From 80f274ca6f2f4572d827edd6dc7f736d7a8c036a Mon Sep 17 00:00:00 2001 From: Patrice Vignola Date: Tue, 16 Jan 2024 09:42:59 -0800 Subject: [PATCH 03/15] Fix SkipLayerNormalization shape inference (#18724) SkipLayerNorm has more than one input, so `propagateShapeAndTypeFromFirstInput` is not enough. --- .../core/graph/contrib_ops/bert_defs.cc | 4 +- .../contrib_ops/shape_inference_functions.cc | 39 +++++++++++++++++++ .../contrib_ops/shape_inference_functions.h | 3 +- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/bert_defs.cc b/onnxruntime/core/graph/contrib_ops/bert_defs.cc index df8d0a59cb033..0317ffcfb0e31 100644 --- a/onnxruntime/core/graph/contrib_ops/bert_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/bert_defs.cc @@ -1285,7 +1285,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA( .Output(3, "input_skip_bias_sum", "Sum of the input and skip inputs (and bias if it exists) with shape (batch_size, sequence_length, hidden_size).", "T", OpSchema::Optional) .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float or half tensors.") .TypeConstraint("U", {"tensor(float)"}, "Constrain mean and inv_std_var to float tensors.") - .TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput)); + .TypeAndShapeInferenceFunction(SkipLayerNormalizationShapeInference)); ONNX_MS_OPERATOR_SET_SCHEMA( SkipSimplifiedLayerNormalization, 1, @@ -1334,7 +1334,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA( OpSchema::Optional) .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float or half tensors.") .TypeConstraint("U", {"tensor(float)"}, "Constrain mean and inv_std_var to float tensors.") - .TypeAndShapeInferenceFunction(ONNX_NAMESPACE::propagateShapeAndTypeFromFirstInput)); + .TypeAndShapeInferenceFunction(SkipLayerNormalizationShapeInference)); constexpr const char* NGramRepeatBlock_ver1_doc = R"DOC( Enforce no repetition of n-grams. Scores are set to `-inf` for tokens that form a repeated n-gram if added to the back of the input_ids. diff --git a/onnxruntime/core/graph/contrib_ops/shape_inference_functions.cc b/onnxruntime/core/graph/contrib_ops/shape_inference_functions.cc index eeef20e9dff5e..8b1812f62be25 100644 --- a/onnxruntime/core/graph/contrib_ops/shape_inference_functions.cc +++ b/onnxruntime/core/graph/contrib_ops/shape_inference_functions.cc @@ -114,6 +114,45 @@ void EmbedLayerNormalizationShapeInference(::ONNX_NAMESPACE::InferenceContext& c } } +void SkipLayerNormalizationShapeInference(::ONNX_NAMESPACE::InferenceContext& ctx) { + propagateShapeAndTypeFromFirstInput(ctx); + + auto stash_type = ONNX_NAMESPACE::TensorProto_DataType_FLOAT; + if (ctx.getNumOutputs() > 1) { + auto output_type = ctx.getOutputType(1); + output_type->mutable_tensor_type()->set_elem_type(static_cast(stash_type)); + } + if (ctx.getNumOutputs() > 2) { + auto output_type = ctx.getOutputType(2); + output_type->mutable_tensor_type()->set_elem_type(static_cast(stash_type)); + } + if (ctx.getNumOutputs() > 3) { + propagateElemTypeFromInputToOutput(ctx, 0, 3); + } + if (!hasNInputShapes(ctx, 1)) { + return; + } + auto& input_shape = ctx.getInputType(0)->tensor_type().shape(); + int64_t input_ndim = input_shape.dim_size(); + int axis = static_cast(input_ndim - 1); + + if (ctx.getNumOutputs() > 1) { + auto mean_shape = ctx.getOutputType(1)->mutable_tensor_type()->mutable_shape(); + mean_shape->CopyFrom(input_shape); + mean_shape->mutable_dim(axis)->set_dim_value(1); + } + + if (ctx.getNumOutputs() > 2) { + auto inv_std_dev_shape = ctx.getOutputType(2)->mutable_tensor_type()->mutable_shape(); + inv_std_dev_shape->CopyFrom(input_shape); + inv_std_dev_shape->mutable_dim(axis)->set_dim_value(1); + } + + if (ctx.getNumOutputs() > 3) { + propagateShapeFromInputToOutput(ctx, 0, 3); + } +} + // Shape inference for Attention and QAttention void AttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int past_input_index) { // Input 0, 1, 2 are input, weights and bias. diff --git a/onnxruntime/core/graph/contrib_ops/shape_inference_functions.h b/onnxruntime/core/graph/contrib_ops/shape_inference_functions.h index 93cf5b304f653..6eb06af15309c 100644 --- a/onnxruntime/core/graph/contrib_ops/shape_inference_functions.h +++ b/onnxruntime/core/graph/contrib_ops/shape_inference_functions.h @@ -13,5 +13,6 @@ namespace onnxruntime { namespace contrib { void AttentionTypeAndShapeInference(ONNX_NAMESPACE::InferenceContext& ctx, int past_input_index); void EmbedLayerNormalizationShapeInference(::ONNX_NAMESPACE::InferenceContext& ctx); +void SkipLayerNormalizationShapeInference(::ONNX_NAMESPACE::InferenceContext& ctx); } // namespace contrib -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime From 8e272b9cac70a11c472fb002af755213a4dabf66 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 16 Jan 2024 16:53:15 -0500 Subject: [PATCH 04/15] Update build.py to remove unused functions and update python to 3.8 (#19164) ### Description ### Motivation and Context --- tools/ci_build/build.py | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 0da4adb51767d..1a6262edf45c9 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -56,7 +56,7 @@ def __init__(self, message): def _check_python_version(): - required_minor_version = 7 + required_minor_version = 8 if (sys.version_info.major, sys.version_info.minor) < (3, required_minor_version): raise UsageError( f"Invalid Python version. At least Python 3.{required_minor_version} is required. " @@ -786,11 +786,6 @@ def get_linux_distro(): return "", "" -def is_ubuntu_1604(): - dist, ver = get_linux_distro() - return dist == "Ubuntu" and ver.startswith("16.04") - - def get_config_build_dir(build_dir, config): # build directory per configuration return os.path.join(build_dir, config) @@ -844,15 +839,6 @@ def update_submodules(source_dir): run_subprocess(["git", "submodule", "update", "--init", "--recursive"], cwd=source_dir) -def is_docker(): - path = "/proc/self/cgroup" - return ( - os.path.exists("/.dockerenv") - or os.path.isfile(path) - and any("docker" in line for line in open(path)) # noqa: SIM115 - ) - - def install_python_deps(numpy_version=""): dep_packages = ["setuptools", "wheel", "pytest"] dep_packages.append(f"numpy=={numpy_version}" if numpy_version else "numpy>=1.16.6") @@ -2401,16 +2387,6 @@ def run_csharp_tests(source_dir, build_dir, use_cuda, use_openvino, use_tensorrt run_subprocess(cmd_args, cwd=csharp_source_dir) -def is_cross_compiling_on_apple(args): - if not is_macOS(): - return False - if args.ios: - return True - if args.osx_arch != platform.machine(): - return True - return False - - def generate_documentation(source_dir, build_dir, configs, validate): # Randomly choose one build config config = next(iter(configs)) @@ -2725,12 +2701,6 @@ def main(): log.info("Activating emsdk...") run_subprocess([emsdk_file, "activate", emsdk_version], cwd=emsdk_dir) - if is_ubuntu_1604(): - if args.arm or args.arm64: - raise BuildError("Only Windows ARM(64) cross-compiled builds supported currently through this script") - if not is_docker() and not args.use_acl and not args.use_armnn: - install_python_deps() - if args.enable_pybind and is_windows(): install_python_deps(args.numpy_version) From c935c8fbd2e463a3e0153145140a8efd780dfabc Mon Sep 17 00:00:00 2001 From: moyo1997 <54333118+moyo1997@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:24:37 -0800 Subject: [PATCH 05/15] remove unnecessary environment variable (#19166) remove unnecessary environment variable when building as arm64x --- build_arm64x.bat | 1 - 1 file changed, 1 deletion(-) diff --git a/build_arm64x.bat b/build_arm64x.bat index fbcdd373086a9..1ed268ae94a43 100644 --- a/build_arm64x.bat +++ b/build_arm64x.bat @@ -5,7 +5,6 @@ setlocal set PATH=C:\Program Files\Git\usr\bin;%PATH% -set LINK_REPRO_NAME=/mylink.rsp rem Requires a Python install to be available in your PATH python "%~dp0\tools\ci_build\build.py" --arm64 --buildasx --build_dir "%~dp0\build\arm64-x" %* From e61861b0a121bca1d60e5d4a3722e52b6820c430 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Tue, 16 Jan 2024 16:36:28 -0800 Subject: [PATCH 06/15] Clean up generated files in QNN UTs (#19127) ### Description Clean up generated files in QNN UTs --- onnxruntime/test/providers/qnn/simple_op_htp_test.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index 8ff65c08e8633..c4244fe532456 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -815,7 +815,8 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheNonEmbedModeTest) { // Check the Onnx skeleton file is generated EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str())); // Check the Qnn context cache binary file is generated - EXPECT_TRUE(std::filesystem::exists("qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin")); + std::string qnn_ctx_bin = "qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin"; + EXPECT_TRUE(std::filesystem::exists(qnn_ctx_bin)); // 2nd run loads and run from QDQ model + Onnx skeleton file + Qnn context cache binary file TestQDQModelAccuracy(BuildOpTestCase(op_type, {input_def}, {}, {}), @@ -837,6 +838,10 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheNonEmbedModeTest) { QDQTolerance(), logging::Severity::kERROR, context_binary_file); + + // Clean up + ASSERT_EQ(std::remove(context_binary_file.c_str()), 0); + ASSERT_EQ(std::remove(qnn_ctx_bin.c_str()), 0); } // Run QDQ model on HTP 2 times @@ -898,6 +903,9 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCache_InvalidGraph) { ASSERT_STATUS_OK(session_object.Load(qnn_ctx_model_data.data(), static_cast(qnn_ctx_model_data.size()))); // Verify the return status with code INVALID_GRAPH ASSERT_TRUE(session_object.Initialize().Code() == common::StatusCode::INVALID_GRAPH); + + // Clean up + ASSERT_EQ(std::remove(context_binary_file.c_str()), 0); } // Run QDQ model on HTP with 2 inputs @@ -955,6 +963,8 @@ TEST_F(QnnHTPBackendTests, ContextBinary2InputsTest) { QDQTolerance(), logging::Severity::kERROR, context_binary_file); + // Clean up + ASSERT_EQ(std::remove(context_binary_file.c_str()), 0); } TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { From 81d363045ba273b16a3ec654c53a15217a2d2a36 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 16 Jan 2024 17:25:18 -0800 Subject: [PATCH 07/15] Upgrade Ubuntu machine pool from 20.04 to 22.04 (#19117) ### Description Upgrade Ubuntu machine pool from 20.04 to 22.04 --- .../build-perf-test-binaries-pipeline.yml | 2 +- .../c-api-noopenmp-packaging-pipelines.yml | 2 +- ...lean-build-docker-image-cache-pipeline.yml | 10 +-------- .../cuda-packaging-pipeline.yml | 2 +- .../azure-pipelines/linux-ci-pipeline.yml | 4 ++-- .../linux-cpu-aten-pipeline.yml | 2 +- .../linux-cpu-eager-pipeline.yml | 2 +- .../azure-pipelines/linux-gpu-ci-pipeline.yml | 2 +- .../linux-migraphx-ci-pipeline.yml | 2 +- .../npm-packaging-pipeline.yml | 4 ++-- .../nuget/templates/test_linux.yml | 2 +- .../orttraining-linux-ci-pipeline.yml | 2 +- .../orttraining-pai-ci-pipeline.yml | 4 ++-- .../orttraining-py-packaging-pipeline-cpu.yml | 2 +- .../azure-pipelines/post-merge-jobs.yml | 6 ++--- .../py-package-test-pipeline.yml | 2 +- .../stages/py-cuda-packaging-stage.yml | 2 +- .../stages/py-cuda-publishing-stage.yml | 2 +- .../templates/android-java-api-aar.yml | 2 +- .../templates/build-linux-wasm-step.yml | 22 +++++++++---------- .../azure-pipelines/templates/c-api-cpu.yml | 4 ++-- .../templates/c-api-linux-cpu.yml | 2 +- .../azure-pipelines/templates/linux-ci.yml | 2 +- .../linux-cpu-packaging-pipeline.yml | 2 +- .../templates/linux-wasm-ci.yml | 2 +- ...device-training-cpu-packaging-pipeline.yml | 2 +- .../py-packaging-selectable-stage.yml | 2 +- .../templates/py-packaging-stage.yml | 4 ++-- .../github/azure-pipelines/templates/rocm.yml | 2 +- .../azure-pipelines/web-ci-pipeline.yml | 2 +- .../linux/build_linux_python_package.sh | 6 ++--- .../ci_build/github/linux/run_python_tests.sh | 2 +- 32 files changed, 50 insertions(+), 60 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml index 3ddc167bc0a61..d37e9bdc5da4c 100644 --- a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml @@ -28,7 +28,7 @@ stages: artifactName: 'onnxruntime-android-full-aar' job_name_suffix: 'Full' publish_executables: '1' - pool_name: 'onnxruntime-Ubuntu2004-AMD-CPU' + pool_name: 'onnxruntime-Ubuntu2204-AMD-CPU' # build Python packages # Linux GPU only diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 2169a3ce1bb9e..3803333bd880a 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -246,7 +246,7 @@ stages: workspace: clean: all timeoutInMinutes: 120 - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU variables: RocmVersion: '5.6' steps: diff --git a/tools/ci_build/github/azure-pipelines/clean-build-docker-image-cache-pipeline.yml b/tools/ci_build/github/azure-pipelines/clean-build-docker-image-cache-pipeline.yml index 24086b6166fe4..43e668eef8d00 100644 --- a/tools/ci_build/github/azure-pipelines/clean-build-docker-image-cache-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/clean-build-docker-image-cache-pipeline.yml @@ -19,8 +19,7 @@ variables: jobs: - job: Clean_Build_Docker_Image_Cache - pool: - vmImage: 'ubuntu-20.04' + pool: onnxruntime-Ubuntu2204-AMD-CPU timeoutInMinutes: 30 @@ -29,13 +28,6 @@ jobs: submodules: false fetchDepth: 1 - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.9' - addToPath: true - architecture: 'x64' - displayName: "Use Python 3.9" - - task: AzureCLI@2 inputs: azureSubscription: 'AIInfraBuild' diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index df7b5f59d28fc..1d2ba88652f48 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -126,7 +126,7 @@ stages: BaseImage: 'registry.access.redhat.com/ubi8/ubi' OnnxruntimeArch: 'x64' OnnxruntimeNodejsBindingArch: 'x64' - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' PackageJava: false PackageNodeJS: false # Nuget Packaging diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml index 07f672c75d029..cff7c96aa9253 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml @@ -46,7 +46,7 @@ stages: skipComponentGovernanceDetection: true ORT_CACHE_DIR: $(Agent.TempDirectory)/ort_ccache TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' @@ -123,7 +123,7 @@ stages: skipComponentGovernanceDetection: true ORT_CACHE_DIR: $(Agent.TempDirectory)/ort_ccache TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml index 146186e9eeaf5..090ce97296687 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-aten-pipeline.yml @@ -43,7 +43,7 @@ jobs: variables: CCACHE_DIR: $(Agent.TempDirectory)/ccache TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' diff --git a/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml index a5c08e95b7efc..d3d13cc5344da 100644 --- a/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml @@ -51,7 +51,7 @@ jobs: timeoutInMinutes: 120 workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU steps: - checkout: self clean: true diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml index 0993a81a02249..5bc8c3603ee92 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml @@ -64,7 +64,7 @@ jobs: CCACHE_DIR: $(Pipeline.Workspace)/ccache workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index f7571a3b7eab6..9cf7a3fb42397 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -46,7 +46,7 @@ jobs: TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU timeoutInMinutes: 120 steps: diff --git a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml index 7f73da23b5eb1..21fc205c72e89 100644 --- a/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/npm-packaging-pipeline.yml @@ -41,7 +41,7 @@ stages: parameters: NpmPackagingMode: ${{ variables.NpmPackagingMode }} IsReleasePipeline: true - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' PackageName: 'onnxruntime-web' ExtraBuildArgs: '' UseWebPoolName: true @@ -54,7 +54,7 @@ stages: parameters: NpmPackagingMode: ${{ variables.NpmPackagingMode }} BuildConfig: 'Release' - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' PackageName: 'onnxruntime-react-native' BuildAndroidAARStageDependsOn: 'Precheck_and_extract_commit' diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml index f44106c145228..2567bec9fdfc2 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml @@ -1,5 +1,5 @@ parameters: - AgentPool: 'onnxruntime-Ubuntu2004-AMD-CPU' + AgentPool: 'onnxruntime-Ubuntu2204-AMD-CPU' ArtifactSuffix: '' NugetPackageName : '' StageSuffix: 'CPU' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml index 018672e0b2dea..26fd5e1ec0b5d 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml @@ -44,7 +44,7 @@ jobs: skipComponentGovernanceDetection: true CCACHE_DIR: $(Pipeline.Workspace)/ccache TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] - pool: onnxruntime-Ubuntu-2004-Training-CPU + pool: onnxruntime-Ubuntu-2204-Training-CPU steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml index a53f91fb317cb..71b224b65964f 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml @@ -37,7 +37,7 @@ jobs: TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU timeoutInMinutes: 120 steps: @@ -132,7 +132,7 @@ jobs: TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU timeoutInMinutes: 120 steps: diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml index 817ace0571837..a44a8c215939f 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml @@ -16,7 +16,7 @@ stages: timeoutInMinutes: 180 workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU strategy: matrix: diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml index 5ee39876733e2..3ec5400dacc65 100644 --- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml +++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml @@ -4,7 +4,7 @@ stages: parameters: NpmPackagingMode: 'dev' IsReleasePipeline: true - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' BuildStaticLib: true ExtraBuildArgs: '' UseWebPoolName: true @@ -367,7 +367,7 @@ stages: timeoutInMinutes: 150 variables: skipComponentGovernanceDetection: true - pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + pool: 'onnxruntime-Ubuntu2204-AMD-CPU' steps: - template: templates/set-version-number-variables-step.yml @@ -413,7 +413,7 @@ stages: - job: AndroidCustomBuildScript workspace: clean: all - pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + pool: 'onnxruntime-Ubuntu2204-AMD-CPU' variables: dockerImageTag: onnxruntime-android-custom-build steps: diff --git a/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml index 55d3150f21aa3..04f555deb1a22 100644 --- a/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-package-test-pipeline.yml @@ -18,7 +18,7 @@ stages: - template: templates/py-packaging-linux-test-cpu.yml parameters: arch: 'x86_64' - machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + machine_pool: 'onnxruntime-Ubuntu2204-AMD-CPU' base_image: 'registry.access.redhat.com/ubi8/ubi' devtoolset_rootpath: /opt/rh/gcc-toolset-12/root ld_library_path_arg: /opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64 diff --git a/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml index e6d8ee35e75e3..f82c80d4d7e93 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-cuda-packaging-stage.yml @@ -105,7 +105,7 @@ stages: - template: ../templates/py-linux-gpu.yml parameters: arch: 'x86_64' - machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + machine_pool: 'onnxruntime-Ubuntu2204-AMD-CPU' extra_build_arg: ${{ parameters.build_py_parameters }} cmake_build_type: ${{ parameters.cmake_build_type }} docker_base_image: ${{ variables.docker_base_image }} diff --git a/tools/ci_build/github/azure-pipelines/stages/py-cuda-publishing-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-cuda-publishing-stage.yml index 4f440e0f61b3d..2a4debcf9fba5 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-cuda-publishing-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-cuda-publishing-stage.yml @@ -20,7 +20,7 @@ stages: dependsOn: [] jobs: - job: - pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + pool: 'onnxruntime-Ubuntu2204-AMD-CPU' steps: - checkout: none - task: DownloadPipelineArtifact@2 diff --git a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml index 5e61f88b4aa18..509fea45ebe53 100644 --- a/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml +++ b/tools/ci_build/github/azure-pipelines/templates/android-java-api-aar.yml @@ -33,7 +33,7 @@ parameters: - name: pool_name displayName: Pool name type: string - default: 'onnxruntime-Ubuntu2004-AMD-CPU' + default: 'onnxruntime-Ubuntu2204-AMD-CPU' - name: packageName # now we can build onnxruntime or onnxruntime-mobile for Android, need specify it here diff --git a/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml b/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml index e664cf69dec76..e77b1a4008b7c 100644 --- a/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml +++ b/tools/ci_build/github/azure-pipelines/templates/build-linux-wasm-step.yml @@ -24,19 +24,17 @@ parameters: type: string steps: - - task: Cache@2 - inputs: - ${{if eq(variables['Build.SourceBranchName'], 'merge')}}: - key: ' "${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} | merge ' - ${{else}}: - key: '"${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} | $(Build.SourceVersion) ' - path: ${{parameters.CacheDir}} - restoreKeys: | - "${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} - displayName: Cache Task - condition: eq('${{parameters.WithCache}}', true) - - ${{if eq(parameters.WithCache, true)}}: + - task: Cache@2 + inputs: + ${{if eq(variables['Build.SourceBranchName'], 'merge')}}: + key: ' "${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} | merge ' + ${{else}}: + key: '"${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} | $(Build.SourceVersion) ' + path: ${{parameters.CacheDir}} + restoreKeys: | + "${{parameters.TODAY}}" | ${{parameters.AdditionalKey}} + displayName: Cache Task - script: | set -e -x pushd '$(Build.SourcesDirectory)/cmake/external/emsdk' diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml index 81319e07c6b17..168602a17910b 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-cpu.yml @@ -759,7 +759,7 @@ stages: - template: ../nuget/templates/test_linux.yml parameters: - AgentPool : onnxruntime-Ubuntu2004-AMD-CPU + AgentPool : onnxruntime-Ubuntu2204-AMD-CPU NugetPackageName : 'Microsoft.ML.OnnxRuntime' ArtifactSuffix: 'CPU' SpecificArtifact: ${{ parameters.SpecificArtifact }} @@ -796,7 +796,7 @@ stages: OS: Linux BuildId: ${{ parameters.BuildId }} SpecificArtifact: ${{ parameters.SpecificArtifact }} - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' - template: final-jar-testing.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml index 8538f15e93753..cf470b3fa2448 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-linux-cpu.yml @@ -19,7 +19,7 @@ parameters: - name: PoolName type: string - default: 'onnxruntime-Ubuntu2004-AMD-CPU' + default: 'onnxruntime-Ubuntu2204-AMD-CPU' - name: ArtifactNamePrefix type: string diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml index 7b9788d90b17d..15165e3cb0950 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml @@ -1,5 +1,5 @@ parameters: - AgentPool : 'onnxruntime-Ubuntu2004-AMD-CPU' + AgentPool : 'onnxruntime-Ubuntu2204-AMD-CPU' StageName : 'Linux_CI_Dev' RunDockerBuildArgs: '-o ubuntu20.04 -d cpu -x "--build_wheel"' NuPackScript: '' diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml index 6ad5f9f38a4db..8972d55f6e190 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-cpu-packaging-pipeline.yml @@ -32,7 +32,7 @@ stages: BaseImage: 'registry.access.redhat.com/ubi8/ubi' OnnxruntimeArch: 'x64' OnnxruntimeNodejsBindingArch: 'x64' - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' ArtifactNamePrefix: ${{ parameters.ArtifactNamePrefix }} PackageJava: ${{ parameters.PackageJava }} PackageNodeJS: ${{ parameters.PackageNodeJS }} diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml index e6693a6f6d26a..d279e667f9091 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-wasm-ci.yml @@ -13,7 +13,7 @@ parameters: - name: PoolName type: string - default: 'onnxruntime-Ubuntu2004-AMD-CPU' + default: 'onnxruntime-Ubuntu2204-AMD-CPU' - name: SkipPublish type: boolean diff --git a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml index 51583a25f63ac..cf39be23cbdaf 100644 --- a/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/templates/ondevice-training-cpu-packaging-pipeline.yml @@ -336,7 +336,7 @@ stages: - template: ../nuget/templates/test_linux.yml parameters: - AgentPool : onnxruntime-Ubuntu2004-AMD-CPU + AgentPool : onnxruntime-Ubuntu2204-AMD-CPU NugetPackageName : 'Microsoft.ML.OnnxRuntime.Training' ArtifactSuffix: 'Training-CPU' StageSuffix: 'Training_CPU' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml index 00ba5ea4a475a..01cab936aa529 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml @@ -48,7 +48,7 @@ stages: timeoutInMinutes: 90 workspace: clean: all - pool: onnxruntime-Ubuntu2004-AMD-CPU + pool: onnxruntime-Ubuntu2204-AMD-CPU strategy: matrix: ${{ each PythonVersion in parameters.python_version }}: diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index abe06e80f4f19..8669a883c31f1 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -430,7 +430,7 @@ stages: - template: py-linux.yml parameters: arch: 'x86_64' - machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + machine_pool: 'onnxruntime-Ubuntu2204-AMD-CPU' base_image: 'registry.access.redhat.com/ubi8/ubi' devtoolset_rootpath: /opt/rh/gcc-toolset-12/root ld_library_path_arg: /opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64 @@ -443,6 +443,6 @@ stages: - template: py-linux-gpu.yml parameters: arch: 'x86_64' - machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' + machine_pool: 'onnxruntime-Ubuntu2204-AMD-CPU' extra_build_arg: ${{ parameters.build_py_parameters }} cmake_build_type: ${{ parameters.cmake_build_type }} diff --git a/tools/ci_build/github/azure-pipelines/templates/rocm.yml b/tools/ci_build/github/azure-pipelines/templates/rocm.yml index 2e9e6c6b35a2e..43a80aa4fd4e3 100644 --- a/tools/ci_build/github/azure-pipelines/templates/rocm.yml +++ b/tools/ci_build/github/azure-pipelines/templates/rocm.yml @@ -14,7 +14,7 @@ jobs: workspace: clean: all timeoutInMinutes: 180 - pool: Ubuntu-2004-rocm-aiinfra + pool: Ubuntu-2204-rocm-aiinfra variables: - name: PythonVersion value: ${{ parameters.PythonVersion }} diff --git a/tools/ci_build/github/azure-pipelines/web-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/web-ci-pipeline.yml index e352a04068ee8..24809ccfdec1f 100644 --- a/tools/ci_build/github/azure-pipelines/web-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/web-ci-pipeline.yml @@ -53,7 +53,7 @@ stages: parameters: NpmPackagingMode: ${{ variables.NpmPackagingMode }} IsReleasePipeline: false - PoolName: 'onnxruntime-Ubuntu2004-AMD-CPU' + PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU' BuildStaticLib: true ExtraBuildArgs: $(ExtraBuildArgs) WASMTemplate: linux-wasm-ci.yml diff --git a/tools/ci_build/github/linux/build_linux_python_package.sh b/tools/ci_build/github/linux/build_linux_python_package.sh index 1059dd5047477..933d1f3d5874a 100755 --- a/tools/ci_build/github/linux/build_linux_python_package.sh +++ b/tools/ci_build/github/linux/build_linux_python_package.sh @@ -7,9 +7,9 @@ mkdir -p /build/dist EXTRA_ARG="" -# Put 3.8 at the last because Ubuntu 20.04 use python 3.8 and we will upload the intermediate build files of this -# config to Azure DevOps Artifacts and download them to a Ubuntu 20.04 machine to run the tests. -PYTHON_EXES=("/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp310-cp310/bin/python3.10" "/opt/python/cp311-cp311/bin/python3.11" "/opt/python/cp312-cp312/bin/python3.12" "/opt/python/cp38-cp38/bin/python3.8") +# Put 3.8 at the last because Ubuntu 22.04 use python 3.10 and we will upload the intermediate build files of this +# config to Azure DevOps Artifacts and download them to a Ubuntu 22.04 machine to run the tests. +PYTHON_EXES=("/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp311-cp311/bin/python3.11" "/opt/python/cp312-cp312/bin/python3.12" "/opt/python/cp310-cp310/bin/python3.10") while getopts "d:p:x:c:" parameter_Option do case "${parameter_Option}" in diff --git a/tools/ci_build/github/linux/run_python_tests.sh b/tools/ci_build/github/linux/run_python_tests.sh index 3164a10a09dfd..082c561dd17b9 100755 --- a/tools/ci_build/github/linux/run_python_tests.sh +++ b/tools/ci_build/github/linux/run_python_tests.sh @@ -15,7 +15,7 @@ c) BUILD_CONFIG=${OPTARG};; esac done -export PATH=/opt/python/cp38-cp38/bin:$PATH +export PATH=/opt/python/cp310-cp310/bin:$PATH cd /build files=(whl/*.whl) FILE_NAME="${files[0]}" From 07d3aed3aa3a054deb502cedf867f559fc690755 Mon Sep 17 00:00:00 2001 From: Wanming Lin Date: Wed, 17 Jan 2024 13:35:13 +0800 Subject: [PATCH 08/15] [WebNN EP] Fixed build issue with disable_rtti (#19173) Previously building webnn ep with --disable_rtti will throw unboundTypeError since unbound type names are illegal with RTTI disabled in Embind API, we can fix it by adding a -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0 flag. --- cmake/adjust_global_compile_flags.cmake | 5 +++++ cmake/onnxruntime_webassembly.cmake | 5 ++++- tools/ci_build/build.py | 4 ---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index 30d8cbf78fb1a..2c7bf9f1c2f5c 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -123,6 +123,11 @@ if (onnxruntime_DISABLE_RTTI) add_compile_options("$<$:/GR->" "$<$:/we4541>") else() add_compile_options("$<$:-fno-rtti>") + if (onnxruntime_USE_WEBNN) + # Avoid unboundTypeError for WebNN EP since unbound type names are illegal with RTTI disabled + # in Embind API, relevant issue: https://github.com/emscripten-core/emscripten/issues/7001 + add_compile_options("$<$:-DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0>") + endif() endif() else() #MSVC RTTI flag /GR is not added to CMAKE_CXX_FLAGS by default. But, anyway VC++2019 treats "/GR" default on. diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index 858583e64e9df..546d50c1ca2d3 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -268,7 +268,10 @@ else() endif() if (onnxruntime_USE_WEBNN) - set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") + set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT") + if (onnxruntime_DISABLE_RTTI) + set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " -fno-rtti -DEMSCRIPTEN_HAS_UNBOUND_TYPE_NAMES=0") + endif() endif() # Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions. diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 1a6262edf45c9..1034a82cb2854 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1283,10 +1283,6 @@ def generate_build_tree( if args.use_webnn: if not args.build_wasm: raise BuildError("WebNN is only available for WebAssembly build.") - if args.disable_rtti: - # Avoid unboundTypeError for WebNN EP since unbound type names are illegal with RTTI disabled - # in Embind API, relevant issue: https://github.com/emscripten-core/emscripten/issues/16911 - raise BuildError("WebNN is not supported with RTTI disabled.") cmake_args += ["-Donnxruntime_USE_WEBNN=ON"] if args.use_snpe: From 9876cc7c4f5f6249e1dec8b93abf7b8dfcf5ca0c Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 17 Jan 2024 15:46:19 +0800 Subject: [PATCH 09/15] more inputs support for LLM exporter (#19005) ### Description ### Motivation and Context --- .../transformers/large_model_exporter.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py index 1601b1a203b9a..9e8b284bf56c7 100644 --- a/onnxruntime/python/tools/transformers/large_model_exporter.py +++ b/onnxruntime/python/tools/transformers/large_model_exporter.py @@ -224,24 +224,35 @@ def fetch_onnx_inputs_outputs_name( if not num_of_past_key: num_of_past_key = model.config.num_hidden_layers - onnx_inp_names = ("input_ids", "attention_mask") + # filter out constant inputs + onnx_inp_names = tuple( + [torch_input_names[i] for i in range(len(torch_input_names)) if isinstance(onnx_inputs[i], torch.Tensor)] + ) + assert ( + "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names + ), "input_ids and attention_mask must be existed in inputs" onnx_out_names = ("logits",) onnx_dynamic_axes = { "input_ids": {0: "batch_size", 1: "seq_len"}, "attention_mask": {0: "batch_size", 1: "seq_len"}, } + # add dyanmic dimensions for the unkonw inputs + for idx, name in enumerate(onnx_inp_names): + if name not in onnx_dynamic_axes: + unknown_dims = {i: f"{idx}__unknown_dims__{i}" for i in range(onnx_inputs[idx].dim())} + onnx_dynamic_axes[name] = unknown_dims if input_with_past: for i in range(num_of_past_key): - onnx_inp_names += (f"present_key.{i}",) - onnx_inp_names += (f"present_values.{i}",) + onnx_inp_names += (f"past_key_values.{i}.key",) + onnx_inp_names += (f"past_key_values.{i}.value",) onnx_dynamic_axes[onnx_inp_names[-1]] = kv_cache_axis onnx_dynamic_axes[onnx_inp_names[-2]] = kv_cache_axis if with_past or input_with_past: for i in range(num_of_past_key): - onnx_out_names += (f"past_key.{i}",) - onnx_out_names += (f"past_values.{i}",) + onnx_out_names += (f"present.{i}.key",) + onnx_out_names += (f"present.{i}.value",) onnx_dynamic_axes[onnx_out_names[-1]] = kv_cache_axis onnx_dynamic_axes[onnx_out_names[-2]] = kv_cache_axis From 63dd605d3310f5a9540c414216f3f3b67d455c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 17 Jan 2024 19:00:36 +0100 Subject: [PATCH 10/15] Fix untyped float values in quantization tool missing from PR #18043 (#19182) ### Description Extends the code coverage to Entroy, Histogram and Distribution calibration method, fix bugs while doing it. ### Motivation and Context Bugs detected in [Olive](https://github.com/microsoft/OLive). --- .../python/tools/quantization/calibrate.py | 86 +++++++++++++++---- .../python/tools/quantization/quant_utils.py | 2 +- .../python/quantization/test_op_matmul.py | 66 +++++++++++++- 3 files changed, 131 insertions(+), 23 deletions(-) diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index d0db57c392961..77b3dce9fb004 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -5,6 +5,7 @@ # license information. # -------------------------------------------------------------------------- import abc +import copy import itertools import os import uuid @@ -21,6 +22,48 @@ from .quant_utils import apply_plot, load_model_with_shape_infer, smooth_distribution +def rel_entr(pk: np.ndarray, qk: np.ndarray) -> np.ndarray: + """ + See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr. + Python implementation. + """ + res = np.empty(pk.shape, dtype=pk.dtype) + res[:] = pk[:] * np.log(pk[:] / qk[:]) + c2 = (pk == 0) & (qk >= 0) + res[c2] = 0 + c1 = (pk > 0) & (qk > 0) + res[~c1] = np.inf + return res + + +def entropy( + pk: np.ndarray, + qk: np.ndarray, + base: Optional[float] = None, + axis: int = 0, +) -> np.ndarray: + """ + Simplifeied version of entropy. + Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html. + This avoids taking a dependency on scipy just for this function. + """ + assert base is None or base > 0, "base={base} must be a positive number or `None`." + assert qk is not None, "qk is None" + + pk = np.asarray(pk).astype(np.float32) + pk = 1.0 * pk / np.sum(pk, axis=axis, keepdims=True) + + qk = np.asarray(qk).astype(np.float32) + pk, qk = np.broadcast_arrays(pk, qk) + qk = 1.0 * qk / np.sum(qk, axis=axis, keepdims=True) + vec = rel_entr(pk, qk) + + s = np.sum(vec, axis=axis) + if base is not None: + s /= np.log(base) + return s.astype(pk.dtype) + + class TensorData: _allowed = frozenset(["avg", "std", "lowest", "highest", "hist", "hist_edges", "bins"]) _floats = frozenset(["avg", "std", "lowest", "highest", "hist_edges"]) @@ -708,8 +751,8 @@ def collect_absolute_value(self, name_to_arr): min_value = np.min(data_arr_np) max_value = np.max(data_arr_np) else: - min_value = 0 - max_value = 0 + min_value = np.array(0, dtype=data_arr_np.dtype) + max_value = np.array(0, dtype=data_arr_np.dtype) data_arr_np = np.absolute(data_arr_np) # only consider absolute value @@ -725,6 +768,8 @@ def collect_absolute_value(self, name_to_arr): old_histogram = self.histogram_dict[tensor] old_min = old_histogram[2] old_max = old_histogram[3] + assert hasattr(old_min, "dtype"), f"old_min should be a numpy array but is {type(old_min)}" + assert hasattr(old_max, "dtype"), f"old_min should be a numpy array but is {type(old_max)}" old_hist = old_histogram[0] old_hist_edges = old_histogram[1] temp_amax = np.max(data_arr_np) @@ -757,7 +802,7 @@ def collect_value(self, name_to_arr): min_value = np.array(0, dtype=data_arr.dtype) max_value = np.array(0, dtype=data_arr.dtype) - threshold = max(abs(min_value), abs(max_value)) + threshold = np.array(max(abs(min_value), abs(max_value)), dtype=data_arr.dtype) if tensor in self.histogram_dict: old_histogram = self.histogram_dict[tensor] @@ -809,7 +854,7 @@ def merge_histogram(self, old_histogram, data_arr, new_min, new_max, new_thresho def compute_collection_result(self): if not self.histogram_dict or len(self.histogram_dict) == 0: raise ValueError("Histogram has not been collected. Please run collect() first.") - print(f"Finding optimal threshold for each tensor using {self.method} algorithm ...") + print(f"Finding optimal threshold for each tensor using {self.method!r} algorithm ...") if self.method == "entropy": return self.compute_entropy() @@ -938,7 +983,14 @@ def compute_distribution(self): assert avg_coef.dtype != np.float64 assert std_coef.dtype != np.float64 assert hist_edges.dtype != np.float64 - thresholds_dict[tensor] = TensorData(avg=avg_coef, std=std_coef, hist=hist, hist_edges=hist_edges) + thresholds_dict[tensor] = TensorData( + avg=avg_coef, + std=std_coef, + hist=hist, + hist_edges=hist_edges, + lowest=hist_edges.min(), + highest=hist_edges.max(), + ) # Plot histogram for debug only if os.environ.get("QUANTIZATION_DEBUG", 0) in (1, "1"): @@ -952,18 +1004,15 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): `q` is a truncated version of the original distribution. Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf """ - import copy - - from scipy.stats import entropy - hist = histogram[0] hist_edges = histogram[1] num_bins = hist.size zero_bin_index = num_bins // 2 num_half_quantized_bin = num_quantized_bins // 2 + dtype = histogram[1].dtype kl_divergence = np.zeros(zero_bin_index - num_half_quantized_bin + 1) - thresholds = [(0, 0) for i in range(kl_divergence.size)] + thresholds = [(np.array(0, dtype=dtype), np.array(0, dtype=dtype)) for i in range(kl_divergence.size)] # <------------ num bins ----------------> # <--- quantized bins ----> @@ -983,10 +1032,7 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): start_index = zero_bin_index - i end_index = zero_bin_index + i + 1 if (zero_bin_index + i + 1) <= num_bins else num_bins - thresholds[i - num_half_quantized_bin] = ( - float(hist_edges[start_index]), - float(hist_edges[end_index]), - ) + thresholds[i - num_half_quantized_bin] = (hist_edges[start_index], hist_edges[end_index]) sliced_distribution = copy.deepcopy(hist[start_index:end_index]) @@ -1020,15 +1066,15 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): norm = sum(nonzeros[start:end]) if norm != 0: - q[start:end] = float(quantized_bins[index]) / float(norm) + q[start:end] = quantized_bins[index] / norm p = smooth_distribution(p) q = smooth_distribution(q) - - if isinstance(q, np.ndarray): - kl_divergence[i - num_half_quantized_bin] = entropy(p, q) + if p is None or q is None: + div = np.array(np.inf, dtype=dtype) else: - kl_divergence[i - num_half_quantized_bin] = float("inf") + div = np.array(entropy(p, q), dtype=dtype) + kl_divergence[i - num_half_quantized_bin] = div min_kl_divergence_idx = np.argmin(kl_divergence) optimal_threshold = thresholds[min_kl_divergence_idx] @@ -1038,6 +1084,8 @@ def get_entropy_threshold(self, histogram, num_quantized_bins): optimal_threshold = (min_value, optimal_threshold[1]) if optimal_threshold[1] > max_value: optimal_threshold = (optimal_threshold[0], max_value) + assert hasattr(optimal_threshold[0], "dtype") + assert hasattr(optimal_threshold[1], "dtype") return optimal_threshold diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index 68c2b3bf79c8b..036f49b420734 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -653,7 +653,7 @@ def smooth_distribution(p, eps=0.0001): if not n_nonzeros: # raise ValueError('The discrete probability distribution is malformed. All entries are 0.') - return -1 + return None eps1 = eps * float(n_zeros) / float(n_nonzeros) assert eps1 < 1.0, "n_zeros=%d, n_nonzeros=%d, eps1=%f" % ( n_zeros, diff --git a/onnxruntime/test/python/quantization/test_op_matmul.py b/onnxruntime/test/python/quantization/test_op_matmul.py index 344583aa7c624..91368bd643158 100644 --- a/onnxruntime/test/python/quantization/test_op_matmul.py +++ b/onnxruntime/test/python/quantization/test_op_matmul.py @@ -10,13 +10,39 @@ import numpy as np import onnx import packaging.version as pv +from numpy.testing import assert_almost_equal from onnx import TensorProto, helper from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type +from onnxruntime.capi.onnxruntime_pybind11_state import Fail from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType, quantize_dynamic, quantize_static +from onnxruntime.quantization.calibrate import entropy + + +def skip_if_new_opset_exception_raised(func): + def wrapper(*args, **kwargs): + try: + func(*args, **kwargs) + except Fail as e: + if "is under development and support for this is limited" in str(e): + raise unittest.SkipTest(f"Skipped {func} due to opset under development.") # noqa: B904 + raise + + return wrapper class TestOpMatMul(unittest.TestCase): + def test_entropy(self): + try: + from scipy.stats import entropy as scipy_entropy + except ImportError: + raise unittest.SkipTest("scipy not installed.") # noqa: B904 + pk = (np.arange(10) - 5).astype(np.float32) / 10 + qk = -(np.arange(10) - 5).astype(np.float32) / 10 + ent = scipy_entropy(pk, qk) + get = entropy(pk, qk) + assert_almost_equal(ent, get) + def input_feeds(self, n, name2shape, dtype): input_data_list = [] for _i in range(n): @@ -324,10 +350,11 @@ def test_quantize_matmul_u8u8(self): @unittest.skipIf( pv.Version(onnx.__version__) < pv.Version("1.15.1"), reason="Shape inference bug, see onnx PR #5709" ) + @skip_if_new_opset_exception_raised def test_quantize_matmul_u8u8_f16(self): - self.quantize_matmul_u8u8(onnx.TensorProto.FLOAT16, 19, 9) + self.quantize_matmul_u8u8(onnx.TensorProto.FLOAT16, 21, 9) - def quantize_matmul_s8s8(self, tt, opset, ir_version): + def quantize_matmul_s8s8(self, tt, opset, ir_version, calibrate_method=CalibrationMethod.MinMax): np.random.seed(1) model_fp_path = "matmul_fp.onnx" self.construct_model_matmul(model_fp_path, tensor_type=tt, opset=opset, ir_version=ir_version) @@ -341,6 +368,7 @@ def quantize_matmul_s8s8(self, tt, opset, ir_version): activation_type=QuantType.QInt8, weight_type=QuantType.QInt8, extra_options={"ActivationSymmetric": True}, + calibrate_method=calibrate_method, ) self.static_quant_test_qdq( model_fp_path, @@ -348,6 +376,7 @@ def quantize_matmul_s8s8(self, tt, opset, ir_version): activation_type=QuantType.QInt8, weight_type=QuantType.QInt8, extra_options={"ActivationSymmetric": True}, + calibrate_method=calibrate_method, ) # dynamic quantization doesn't support activation:int8 @@ -357,11 +386,42 @@ def quantize_matmul_s8s8(self, tt, opset, ir_version): def test_quantize_matmul_s8s8(self): self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT, 18, 8) + def test_quantize_matmul_s8s8_entropy(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT, 18, 8, calibrate_method=CalibrationMethod.Entropy) + + def test_quantize_matmul_s8s8_percentile(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT, 18, 8, calibrate_method=CalibrationMethod.Percentile) + + def test_quantize_matmul_s8s8_distribution(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT, 18, 8, calibrate_method=CalibrationMethod.Distribution) + @unittest.skipIf( pv.Version(onnx.__version__) < pv.Version("1.15.1"), reason="Shape inference bug, see onnx PR #5709" ) + @skip_if_new_opset_exception_raised def test_quantize_matmul_s8s8_f16(self): - self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT16, 19, 9) + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT16, 21, 9) + + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.15.1"), reason="Shape inference bug, see onnx PR #5709" + ) + @skip_if_new_opset_exception_raised + def test_quantize_matmul_s8s8_f16_entropy(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT16, 21, 9, calibrate_method=CalibrationMethod.Entropy) + + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.15.1"), reason="Shape inference bug, see onnx PR #5709" + ) + @skip_if_new_opset_exception_raised + def test_quantize_matmul_s8s8_f16_percentile(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT16, 21, 9, calibrate_method=CalibrationMethod.Percentile) + + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.15.1"), reason="Shape inference bug, see onnx PR #5709" + ) + @skip_if_new_opset_exception_raised + def test_quantize_matmul_s8s8_f16_distribution(self): + self.quantize_matmul_s8s8(onnx.TensorProto.FLOAT16, 21, 9, calibrate_method=CalibrationMethod.Distribution) def quantize_matmul_e4m3fn_same(self, tt, opset, ir_version): np.random.seed(1) From bd9d8fb2a545a59d87a4c23308ec543ba6e4c41d Mon Sep 17 00:00:00 2001 From: Rachel Guo <35738743+YUNQIUGUO@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:18:32 -0800 Subject: [PATCH 11/15] [ORT 1.17.0 release] Bump up version to 1.18.0 (#19170) ### Description Bump up version to 1.18.0 since the release branch has been cut. ### Motivation and Context Co-authored-by: rachguo --- VERSION_NUMBER | 2 +- .../Training/NativeTrainingMethods.shared.cs | 4 ++-- docs/python/README.rst | 5 +++++ include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- js/common/lib/version.ts | 2 +- js/common/package-lock.json | 4 ++-- js/common/package.json | 2 +- js/node/lib/version.ts | 2 +- js/node/package-lock.json | 6 +++--- js/node/package.json | 2 +- js/react_native/lib/version.ts | 2 +- js/react_native/package.json | 2 +- js/react_native/yarn.lock | 2 +- js/web/lib/version.ts | 2 +- js/web/package-lock.json | 6 +++--- js/web/package.json | 2 +- onnxruntime/__init__.py | 2 +- onnxruntime/core/session/onnxruntime_c_api.cc | 8 ++++---- 18 files changed, 31 insertions(+), 26 deletions(-) diff --git a/VERSION_NUMBER b/VERSION_NUMBER index 092afa15df4df..84cc529467b05 100644 --- a/VERSION_NUMBER +++ b/VERSION_NUMBER @@ -1 +1 @@ -1.17.0 +1.18.0 diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs index 68a399f8b9671..7fe16f4156ef2 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Training/NativeTrainingMethods.shared.cs @@ -65,10 +65,10 @@ static NativeTrainingMethods() DOrtGetApi OrtGetApi = (DOrtGetApi)Marshal.GetDelegateForFunctionPointer(NativeMethods.OrtGetApiBase().GetApi, typeof(DOrtGetApi)); // TODO: Make this save the pointer, and not copy the whole structure across - api_ = (OrtApi)OrtGetApi(17 /*ORT_API_VERSION*/); + api_ = (OrtApi)OrtGetApi(18 /*ORT_API_VERSION*/); OrtGetTrainingApi = (DOrtGetTrainingApi)Marshal.GetDelegateForFunctionPointer(api_.GetTrainingApi, typeof(DOrtGetTrainingApi)); - trainingApiPtr = OrtGetTrainingApi(17 /*ORT_API_VERSION*/); + trainingApiPtr = OrtGetTrainingApi(18 /*ORT_API_VERSION*/); if (trainingApiPtr != IntPtr.Zero) { trainingApi_ = (OrtTrainingApi)Marshal.PtrToStructure(trainingApiPtr, typeof(OrtTrainingApi)); diff --git a/docs/python/README.rst b/docs/python/README.rst index 32bb3729e01d0..bbc8571fe3f17 100644 --- a/docs/python/README.rst +++ b/docs/python/README.rst @@ -8,6 +8,11 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime `_ or the `Github project `_. """ -__version__ = "1.17.0" +__version__ = "1.18.0" __author__ = "Microsoft" # we need to do device version validation (for example to check Cuda version for an onnxruntime-training package). diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index d77c188f832a7..91a7f0d930b51 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2397,7 +2397,7 @@ Second example, if we wanted to add and remove some members, we'd do this: In GetApi we now make it return ort_api_3 for version 3. */ -static constexpr OrtApi ort_api_1_to_17 = { +static constexpr OrtApi ort_api_1_to_18 = { // NOTE: The ordering of these fields MUST not change after that version has shipped since existing binaries depend on this ordering. // Shipped as version 1 - DO NOT MODIFY (see above text for more information) @@ -2756,16 +2756,16 @@ static_assert(offsetof(OrtApi, KernelContext_GetResource) / sizeof(void*) == 265 static_assert(offsetof(OrtApi, SetUserLoggingFunction) / sizeof(void*) == 266, "Size of version 17 API cannot change"); // So that nobody forgets to finish an API version, this check will serve as a reminder: -static_assert(std::string_view(ORT_VERSION) == "1.17.0", +static_assert(std::string_view(ORT_VERSION) == "1.18.0", "ORT_Version change detected, please follow below steps to ensure OrtApi is updated properly"); // 1. Update the hardcoded version string in above static_assert to silence it -// 2. If there were any APIs added to ort_api_1_to_17 above: +// 2. If there were any APIs added to ort_api_1_to_18 above: // a. Add the 'End of version #' markers (pattern above should be obvious) // b. Add a static_assert in the directly above list of version sizes to ensure nobody adds any more functions to the just shipped API version ORT_API(const OrtApi*, OrtApis::GetApi, uint32_t version) { if (version >= 1 && version <= ORT_API_VERSION) - return &ort_api_1_to_17; + return &ort_api_1_to_18; fprintf(stderr, "The requested API version [%u] is not available, only API versions [1, %u] are supported in this build." From bc219ed553fc8d4b8fa3c7b4476810a63a864d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= <44298237+gedoensmax@users.noreply.github.com> Date: Wed, 17 Jan 2024 20:33:34 +0100 Subject: [PATCH 12/15] [TensorRT EP] Enable a minimal CUDA EP compilation without kernels (#19052) Adresses https://github.com/microsoft/onnxruntime/issues/18542. I followed the advice given by @RyanUnderhill [here](https://github.com/microsoft/onnxruntime/pull/18731#issuecomment-1848261925) and went with a minimal CUDA EP for now. --- cmake/CMakeLists.txt | 1 + cmake/onnxruntime_providers_cuda.cmake | 49 ++++++++++++++----- .../core/providers/cuda/cuda_context.h | 3 +- onnxruntime/core/providers/cuda/cuda_call.cc | 4 ++ .../core/providers/cuda/cuda_common.cc | 42 ++++++++-------- onnxruntime/core/providers/cuda/cuda_common.h | 6 ++- .../providers/cuda/cuda_execution_provider.cc | 14 +++++- onnxruntime/core/providers/cuda/cuda_pch.h | 7 +++ .../core/providers/cuda/cuda_stream_handle.cc | 4 ++ .../core/providers/cuda/cudnn_common.cc | 3 +- .../core/providers/cuda/cudnn_common.h | 3 +- 11 files changed, 97 insertions(+), 39 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index bc96218dac79e..712d5d76108aa 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -79,6 +79,7 @@ option(onnxruntime_USE_CUDA "Build with CUDA support" OFF) cmake_dependent_option(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS "Build with CUDA unit tests" OFF "onnxruntime_USE_CUDA;onnxruntime_BUILD_UNIT_TESTS;LINUX" OFF) option(onnxruntime_USE_CUDA_NHWC_OPS "Build CUDA with NHWC op support" OFF) +option(onnxruntime_CUDA_MINIMAL "Build CUDA without any operations apart from memcpy ops. Usefuel for a very minial TRT build" OFF) option(onnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO "When building with CUDA support, generate device code line number information." OFF) option(onnxruntime_USE_OPENVINO "Build with OpenVINO support" OFF) option(onnxruntime_USE_COREML "Build with CoreML support" OFF) diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 84d1376f99d5e..9887d615c92d7 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -1,10 +1,25 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. - file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc" - ) + + if (onnxruntime_CUDA_MINIMAL) + file(GLOB onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/tunable/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/tunable/*.cc" + ) + # Remove pch files + list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs + "${ONNXRUNTIME_ROOT}/core/providers/cuda/integer_gemm.cc" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/triton_kernel.h" + ) + else() + file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc" + ) + endif() # Remove pch files list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h" @@ -16,11 +31,16 @@ "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h" "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc" ) - file(GLOB_RECURSE onnxruntime_providers_cuda_cu_srcs CONFIGURE_DEPENDS - "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cu" - "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cuh" - ) + + if (onnxruntime_CUDA_MINIMAL) + set(onnxruntime_providers_cuda_shared_srcs "") + else() + file(GLOB_RECURSE onnxruntime_providers_cuda_cu_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cu" + "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cuh" + ) + endif() source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs}) set(onnxruntime_providers_cuda_src ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs}) @@ -156,10 +176,15 @@ endif() add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES}) - target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface) - if(onnxruntime_CUDNN_HOME) - target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include) - target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib) + if(onnxruntime_CUDA_MINIMAL) + target_compile_definitions(${target} PRIVATE USE_CUDA_MINIMAL) + target_link_libraries(${target} PRIVATE ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface) + else() + target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface) + if(onnxruntime_CUDNN_HOME) + target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include) + target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib) + endif() endif() if (onnxruntime_USE_TRITON_KERNEL) diff --git a/include/onnxruntime/core/providers/cuda/cuda_context.h b/include/onnxruntime/core/providers/cuda/cuda_context.h index 9416fad5f1448..1370f5c4c5e10 100644 --- a/include/onnxruntime/core/providers/cuda/cuda_context.h +++ b/include/onnxruntime/core/providers/cuda/cuda_context.h @@ -16,9 +16,10 @@ #include "core/providers/custom_op_context.h" #include #include +#ifndef USE_CUDA_MINIMAL #include #include - +#endif namespace Ort { namespace Custom { diff --git a/onnxruntime/core/providers/cuda/cuda_call.cc b/onnxruntime/core/providers/cuda/cuda_call.cc index 4f223041e04e3..f60684795a4bc 100644 --- a/onnxruntime/core/providers/cuda/cuda_call.cc +++ b/onnxruntime/core/providers/cuda/cuda_call.cc @@ -30,6 +30,7 @@ const char* CudaErrString(cudaError_t x) { return cudaGetErrorString(x); } +#ifndef USE_CUDA_MINIMAL template <> const char* CudaErrString(cublasStatus_t e) { cudaDeviceSynchronize(); @@ -76,6 +77,7 @@ const char* CudaErrString(cufftResult e) { return "Unknown cufft error status"; } } +#endif #ifdef ORT_USE_NCCL template <> @@ -132,6 +134,7 @@ std::conditional_t CudaCall( template Status CudaCall(cudaError retCode, const char* exprString, const char* libName, cudaError successCode, const char* msg, const char* file, const int line); template void CudaCall(cudaError retCode, const char* exprString, const char* libName, cudaError successCode, const char* msg, const char* file, const int line); +#ifndef USE_CUDA_MINIMAL template Status CudaCall(cublasStatus_t retCode, const char* exprString, const char* libName, cublasStatus_t successCode, const char* msg, const char* file, const int line); template void CudaCall(cublasStatus_t retCode, const char* exprString, const char* libName, cublasStatus_t successCode, const char* msg, const char* file, const int line); template Status CudaCall(cudnnStatus_t retCode, const char* exprString, const char* libName, cudnnStatus_t successCode, const char* msg, const char* file, const int line); @@ -140,6 +143,7 @@ template Status CudaCall(curandStatus_t retCode, const ch template void CudaCall(curandStatus_t retCode, const char* exprString, const char* libName, curandStatus_t successCode, const char* msg, const char* file, const int line); template Status CudaCall(cufftResult retCode, const char* exprString, const char* libName, cufftResult successCode, const char* msg, const char* file, const int line); template void CudaCall(cufftResult retCode, const char* exprString, const char* libName, cufftResult successCode, const char* msg, const char* file, const int line); +#endif #ifdef ORT_USE_NCCL template Status CudaCall(ncclResult_t retCode, const char* exprString, const char* libName, ncclResult_t successCode, const char* msg, const char* file, const int line); diff --git a/onnxruntime/core/providers/cuda/cuda_common.cc b/onnxruntime/core/providers/cuda/cuda_common.cc index 33f2938940e4d..65083f89f7f77 100644 --- a/onnxruntime/core/providers/cuda/cuda_common.cc +++ b/onnxruntime/core/providers/cuda/cuda_common.cc @@ -14,6 +14,27 @@ namespace cuda { // 0x04 - pedantic constexpr const char* kCudaGemmOptions = "ORT_CUDA_GEMM_OPTIONS"; +const char* CudaDataTypeToString(cudaDataType_t dt) { + switch (dt) { + case CUDA_R_16F: + return "CUDA_R_16F"; + case CUDA_R_16BF: + return "CUDA_R_16BF"; + case CUDA_R_32F: + return "CUDA_R_32F"; +#if !defined(DISABLE_FLOAT8_TYPES) + // Note: CUDA_R_8F_E4M3 is defined with CUDA>=11.8 + case CUDA_R_8F_E4M3: + return "CUDA_R_8F_E4M3"; + case CUDA_R_8F_E5M2: + return "CUDA_R_8F_E5M2"; +#endif + default: + return ""; + } +} + +#ifndef USE_CUDA_MINIMAL // Initialize the singleton instance HalfGemmOptions HalfGemmOptions::instance; @@ -54,26 +75,6 @@ const char* cublasGetErrorEnum(cublasStatus_t error) { } } -const char* CudaDataTypeToString(cudaDataType_t dt) { - switch (dt) { - case CUDA_R_16F: - return "CUDA_R_16F"; - case CUDA_R_16BF: - return "CUDA_R_16BF"; - case CUDA_R_32F: - return "CUDA_R_32F"; -#if !defined(DISABLE_FLOAT8_TYPES) - // Note: CUDA_R_8F_E4M3 is defined with CUDA>=11.8 - case CUDA_R_8F_E4M3: - return "CUDA_R_8F_E4M3"; - case CUDA_R_8F_E5M2: - return "CUDA_R_8F_E5M2"; -#endif - default: - return ""; - } -} - const char* CublasComputeTypeToString(cublasComputeType_t ct) { switch (ct) { case CUBLAS_COMPUTE_16F: @@ -92,6 +93,7 @@ const char* CublasComputeTypeToString(cublasComputeType_t ct) { return ""; } } +#endif // It must exist somewhere already. cudaDataType_t ToCudaDataType(int32_t element_type) { diff --git a/onnxruntime/core/providers/cuda/cuda_common.h b/onnxruntime/core/providers/cuda/cuda_common.h index 707099bac3ce0..e9941ce743bc3 100644 --- a/onnxruntime/core/providers/cuda/cuda_common.h +++ b/onnxruntime/core/providers/cuda/cuda_common.h @@ -22,13 +22,14 @@ namespace onnxruntime { namespace cuda { #define CUDA_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CUDA_CALL(expr)) +#ifndef USE_CUDA_MINIMAL #define CUBLAS_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CUBLAS_CALL(expr)) #define CUSPARSE_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CUSPARSE_CALL(expr)) #define CURAND_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CURAND_CALL(expr)) #define CUDNN_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CUDNN_CALL(expr)) #define CUDNN2_RETURN_IF_ERROR(expr, m) ORT_RETURN_IF_ERROR(CUDNN_CALL2(expr, m)) #define CUFFT_RETURN_IF_ERROR(expr) ORT_RETURN_IF_ERROR(CUFFT_CALL(expr)) - +#endif // Type mapping for MLFloat16 to half template class ToCudaType { @@ -93,7 +94,7 @@ inline bool CalculateFdmStrides(gsl::span p, const std::vector KernelCreateInfo BuildKernelCreateInfo() { @@ -1326,6 +1332,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing BuildKernelCreateInfo, BuildKernelCreateInfo, +#ifndef USE_CUDA_MINIMAL BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2201,6 +2208,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, +#endif }; for (auto& function_table_entry : function_table) { @@ -2210,6 +2218,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) { } } +#ifndef USE_CUDA_MINIMAL #ifndef DISABLE_CONTRIB_OPS ORT_RETURN_IF_ERROR(::onnxruntime::contrib::cuda::RegisterCudaContribKernels(kernel_registry)); #endif @@ -2220,6 +2229,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) { #ifdef ENABLE_TRAINING_OPS ORT_RETURN_IF_ERROR(::onnxruntime::cuda::RegisterCudaTrainingKernels(kernel_registry)); +#endif #endif return Status::OK(); diff --git a/onnxruntime/core/providers/cuda/cuda_pch.h b/onnxruntime/core/providers/cuda/cuda_pch.h index f48554e8f1286..dfe50fe0a8832 100644 --- a/onnxruntime/core/providers/cuda/cuda_pch.h +++ b/onnxruntime/core/providers/cuda/cuda_pch.h @@ -10,12 +10,19 @@ #include #include +#include +#ifndef USE_CUDA_MINIMAL #include #include #include #include #include #include +#else +typedef void* cudnnHandle_t; +typedef void* cublasHandle_t; +typedef void* cublasLtHandle_t; +#endif #ifdef ORT_USE_NCCL #include diff --git a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc index 7c866395ecf6e..0a256394b7d99 100644 --- a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc +++ b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc @@ -69,6 +69,7 @@ CudaStream::CudaStream(cudaStream_t stream, release_cpu_buffer_on_cuda_stream_(release_cpu_buffer_on_cuda_stream), deferred_cpu_allocator_(*this), ep_info_(ep_info) { +#ifndef USE_CUDA_MINIMAL if (own_flag) { CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_)); CUBLAS_CALL_THROW(cublasSetStream(cublas_handle_, stream)); @@ -80,10 +81,12 @@ CudaStream::CudaStream(cudaStream_t stream, cudnn_handle_ = external_cudnn_handle; CUDNN_CALL_THROW(cudnnSetStream(cudnn_handle_, stream)); } +#endif } CudaStream::~CudaStream() { ORT_IGNORE_RETURN_VALUE(CleanUpOnRunEnd()); +#ifndef USE_CUDA_MINIMAL if (own_stream_) { cublasDestroy(cublas_handle_); cudnnDestroy(cudnn_handle_); @@ -91,6 +94,7 @@ CudaStream::~CudaStream() { if (handle) cudaStreamDestroy(static_cast(handle)); } +#endif } std::unique_ptr CudaStream::CreateNotification(size_t /*num_consumers*/) { diff --git a/onnxruntime/core/providers/cuda/cudnn_common.cc b/onnxruntime/core/providers/cuda/cudnn_common.cc index 4df59a98b12e5..c850f7b583bfc 100644 --- a/onnxruntime/core/providers/cuda/cudnn_common.cc +++ b/onnxruntime/core/providers/cuda/cudnn_common.cc @@ -9,7 +9,7 @@ #include "core/common/gsl.h" #include "shared_inc/cuda_call.h" #include "core/providers/cpu/tensor/utils.h" - +#ifndef USE_CUDA_MINIMAL namespace onnxruntime { namespace cuda { @@ -222,3 +222,4 @@ const Float8E5M2 Consts::One = Float8E5M2(1.0f, true); } // namespace cuda } // namespace onnxruntime +#endif diff --git a/onnxruntime/core/providers/cuda/cudnn_common.h b/onnxruntime/core/providers/cuda/cudnn_common.h index 8a94a334ee688..fdd14dedad47e 100644 --- a/onnxruntime/core/providers/cuda/cudnn_common.h +++ b/onnxruntime/core/providers/cuda/cudnn_common.h @@ -7,7 +7,7 @@ #include #include "core/providers/cuda/cuda_common.h" - +#ifndef USE_CUDA_MINIMAL namespace onnxruntime { namespace cuda { @@ -260,3 +260,4 @@ SetPoolingNdDescriptorHelper(cudnnPoolingDescriptor_t poolingDesc, } // namespace cuda } // namespace onnxruntime +#endif From 146ebaf91e85185a0ac18c82bc69eba685ab9727 Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:03:43 -0800 Subject: [PATCH 13/15] [js/web] allow proxy to load model with 1GB <= size < 2GB (#19178) ### Description allow proxy to load model with 1GB <= size < 2GB resolves #19157. --- js/web/lib/wasm/wasm-utils-load-file.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/js/web/lib/wasm/wasm-utils-load-file.ts b/js/web/lib/wasm/wasm-utils-load-file.ts index abe480a43c790..c6cdba2320bde 100644 --- a/js/web/lib/wasm/wasm-utils-load-file.ts +++ b/js/web/lib/wasm/wasm-utils-load-file.ts @@ -47,9 +47,19 @@ export const loadFile = async(file: string|Blob|ArrayBufferLike|Uint8Array): Pro } const reader = response.body.getReader(); - // use WebAssembly Memory to allocate larger ArrayBuffer - const pages = Math.ceil(fileSize / 65536); - const buffer = new WebAssembly.Memory({initial: pages, maximum: pages}).buffer; + let buffer; + try { + // try to create ArrayBuffer directly + buffer = new ArrayBuffer(fileSize); + } catch (e) { + if (e instanceof RangeError) { + // use WebAssembly Memory to allocate larger ArrayBuffer + const pages = Math.ceil(fileSize / 65536); + buffer = new WebAssembly.Memory({initial: pages, maximum: pages}).buffer; + } else { + throw e; + } + } let offset = 0; // eslint-disable-next-line no-constant-condition From f87e69801f200a34ddb312f1d39e7296f19b660b Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:04:22 -0800 Subject: [PATCH 14/15] [js/web] show warning when numThreads is set but threads is not supported (#19179) ### Description show warning when numThreads is set but threads is not supported. Resolves #19148, #18933 for web: when crossOriginIsolated is false. for node: always disable. --- js/web/lib/backend-wasm.ts | 6 ++++++ js/web/lib/wasm/wasm-factory.ts | 33 +++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/js/web/lib/backend-wasm.ts b/js/web/lib/backend-wasm.ts index d9f63fec9c492..31ecffb07e40c 100644 --- a/js/web/lib/backend-wasm.ts +++ b/js/web/lib/backend-wasm.ts @@ -31,6 +31,12 @@ export const initializeFlags = (): void => { } if (typeof env.wasm.numThreads !== 'number' || !Number.isInteger(env.wasm.numThreads) || env.wasm.numThreads <= 0) { + // Web: when crossOriginIsolated is false, SharedArrayBuffer is not available so WebAssembly threads will not work. + // Node.js: onnxruntime-web does not support multi-threads in Node.js. + if ((typeof self !== 'undefined' && !self.crossOriginIsolated) || + (typeof process !== 'undefined' && process.versions && process.versions.node)) { + env.wasm.numThreads = 1; + } const numCpuLogicalCores = typeof navigator === 'undefined' ? cpus().length : navigator.hardwareConcurrency; env.wasm.numThreads = Math.min(4, Math.ceil((numCpuLogicalCores || 1) / 2)); } diff --git a/js/web/lib/wasm/wasm-factory.ts b/js/web/lib/wasm/wasm-factory.ts index 81508a253ce8b..9b9334c93b78c 100644 --- a/js/web/lib/wasm/wasm-factory.ts +++ b/js/web/lib/wasm/wasm-factory.ts @@ -28,13 +28,34 @@ let initialized = false; let initializing = false; let aborted = false; -const isMultiThreadSupported = (): boolean => { - try { - // If 'SharedArrayBuffer' is not available, WebAssembly threads will not work. - if (typeof SharedArrayBuffer === 'undefined') { - return false; +const isMultiThreadSupported = (numThreads: number): boolean => { + // WebAssembly threads are set to 1 (single thread). + if (numThreads === 1) { + return false; + } + + // If 'SharedArrayBuffer' is not available, WebAssembly threads will not work. + if (typeof SharedArrayBuffer === 'undefined') { + if (typeof self !== 'undefined' && !self.crossOriginIsolated) { + // eslint-disable-next-line no-console + console.warn( + 'env.wasm.numThreads is set to ' + numThreads + + ', but this will not work unless you enable crossOriginIsolated mode. ' + + 'See https://web.dev/cross-origin-isolation-guide/ for more info.'); } + return false; + } + + // onnxruntime-web does not support multi-threads in Node.js. + if (typeof process !== 'undefined' && process.versions && process.versions.node) { + // eslint-disable-next-line no-console + console.warn( + 'env.wasm.numThreads is set to ' + numThreads + + ', however, currently onnxruntime-web does not support multi-threads in Node.js. ' + + 'Please consider using onnxruntime-node for performance critical scenarios.'); + } + try { // Test for transferability of SABs (for browsers. needed for Firefox) // https://groups.google.com/forum/#!msg/mozilla.dev.platform/IHkBZlHETpA/dwsMNchWEQAJ if (typeof MessageChannel !== 'undefined') { @@ -106,7 +127,7 @@ export const initializeWebAssembly = async(flags: Env.WebAssemblyFlags): Promise const numThreads = flags.numThreads!; const simd = flags.simd!; - const useThreads = numThreads > 1 && isMultiThreadSupported(); + const useThreads = isMultiThreadSupported(numThreads); const useSimd = simd && isSimdSupported(); const wasmPaths = flags.wasmPaths; From 9da3e36138dd24377fbb0b4022d891b3baf07b84 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Wed, 17 Jan 2024 20:20:42 -0500 Subject: [PATCH 15/15] Fix buildJava from Zip-Nuget-Java-Nodejs Packaging Pipeline (#19187) ### Description ### Motivation and Context --- .../c-api-noopenmp-packaging-pipelines.yml | 2 ++ .../stages/nuget-linux-cuda-packaging-stage.yml | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 3803333bd880a..aa1a75bfcda45 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -204,6 +204,8 @@ stages: CudaVersion: ${{ parameters.CudaVersion }} docker_base_image: ${{ variables.docker_base_image }} linux_trt_version: ${{ variables.linux_trt_version }} + buildJava: true + buildNodejs: true #CUDA without tensorrt - template: templates/win-ci.yml diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml index dbbc9ef27e513..db9bcacbf0754 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml @@ -6,6 +6,12 @@ parameters: type: string - name: linux_trt_version type: string +- name: buildJava + type: boolean + default: false +- name: buildNodejs + type: boolean + default: false stages: # Linux CUDA without TensorRT Packaging @@ -66,9 +72,9 @@ stages: parameters: artifactName: 'onnxruntime-linux-x64-tensorrt-$(OnnxRuntimeVersion)' artifactNameNoVersionString: 'onnxruntime-linux-x64-tensorrt' - buildJava: false + buildJava: ${{ parameters.buildJava }} buildJavaOption: '--build_java' - buildNodejs: false + buildNodejs: ${{ parameters.buildNodejs }} buildNodejsOption: '--build_nodejs' CudaVersion: ${{ parameters.CudaVersion }} # Linux CUDA Combined Testing and Publishing