diff --git a/dali/operators/generic/resize/tensor_resize.h b/dali/operators/generic/resize/tensor_resize.h index a7a4ad685d6..8d277195c70 100644 --- a/dali/operators/generic/resize/tensor_resize.h +++ b/dali/operators/generic/resize/tensor_resize.h @@ -23,6 +23,7 @@ #include "dali/core/expand_dims.h" #include "dali/operators/image/resize/resize_base.h" #include "dali/operators/image/resize/tensor_resize_attr.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" @@ -31,7 +32,7 @@ namespace tensor_resize { template -class TensorResize : public Operator +class TensorResize : public StatelessOperator , protected ResizeBase { public: explicit TensorResize(const OpSpec &spec); @@ -166,7 +167,7 @@ class TensorResize : public Operator template TensorResize::TensorResize(const OpSpec &spec) - : Operator(spec), ResizeBase(spec), resize_attr_(spec) { + : StatelessOperator(spec), ResizeBase(spec), resize_attr_(spec) { InitializeBackend(); } diff --git a/dali/operators/geometry/affine_transforms/combine_transforms.cc b/dali/operators/geometry/affine_transforms/combine_transforms.cc index 528a23ff0c4..f7373ccd135 100644 --- a/dali/operators/geometry/affine_transforms/combine_transforms.cc +++ b/dali/operators/geometry/affine_transforms/combine_transforms.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,10 +20,11 @@ #include "dali/core/static_switch.h" #include "dali/kernels/kernel_manager.h" #include "dali/pipeline/data/types.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/op_spec.h" -#include "dali/pipeline/workspace/workspace.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" +#include "dali/pipeline/workspace/workspace.h" #define TRANSFORM_INPUT_TYPES (float) @@ -49,10 +50,11 @@ Example: combining [T1, T2, T3] is equivalent to T3(T2(T1(...))) for default ord .AllowSequences() .AddParent("TransformAttr"); -class CombineTransformsCPU : public SequenceOperator { +class CombineTransformsCPU : public SequenceOperator { public: + using Base = SequenceOperator; explicit CombineTransformsCPU(const OpSpec &spec) : - SequenceOperator(spec), + Base(spec), reverse_order_(spec.GetArgument("reverse_order")) { } diff --git a/dali/operators/geometry/coord_transform.h b/dali/operators/geometry/coord_transform.h index 3fd734979fa..8a419f400c9 100644 --- a/dali/operators/geometry/coord_transform.h +++ b/dali/operators/geometry/coord_transform.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include "dali/core/static_switch.h" #include "dali/kernels/kernel_manager.h" #include "dali/operators/geometry/mt_transform_attr.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -30,9 +31,10 @@ namespace dali { #define COORD_TRANSFORM_DIMS (1, 2, 3, 4, 5, 6) template -class CoordTransform : public SequenceOperator, private MTTransformAttr { +class CoordTransform : public SequenceOperator, + private MTTransformAttr { public: - using Base = SequenceOperator; + using Base = SequenceOperator; explicit CoordTransform(const OpSpec &spec) : Base(spec), MTTransformAttr(spec) { dtype_ = spec_.template GetArgument("dtype"); } diff --git a/dali/operators/image/color/brightness_contrast.h b/dali/operators/image/color/brightness_contrast.h index 5a4601ad08f..ceb5efe5bd6 100644 --- a/dali/operators/image/color/brightness_contrast.h +++ b/dali/operators/image/color/brightness_contrast.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include "dali/core/static_switch.h" #include "dali/kernels/kernel_manager.h" #include "dali/pipeline/data/views.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -54,17 +55,16 @@ const float kDefaultBrightnessShift = 0; const float kDefaultContrast = 1.f; template -class BrightnessContrastOp : public SequenceOperator { +class BrightnessContrastOp : public SequenceOperator { public: + using Base = SequenceOperator; ~BrightnessContrastOp() override = default; DISABLE_COPY_MOVE_ASSIGN(BrightnessContrastOp); protected: explicit BrightnessContrastOp(const OpSpec &spec) - : SequenceOperator(spec), - output_type_(DALI_NO_TYPE), - input_type_(DALI_NO_TYPE) { + : Base(spec), output_type_(DALI_NO_TYPE), input_type_(DALI_NO_TYPE) { spec.TryGetArgument(output_type_arg_, "dtype"); } @@ -76,7 +76,7 @@ class BrightnessContrastOp : public SequenceOperator { // the number of samples and parameters unnecessarily for FHWC when there are no // per-frame parameters provided. bool ShouldExpand(const Workspace &ws) override { - return SequenceOperator::ShouldExpand(ws) && this->HasPerFrameArgInputs(ws); + return Base::ShouldExpand(ws) && this->HasPerFrameArgInputs(ws); } template @@ -163,7 +163,7 @@ class BrightnessContrastCpu : public BrightnessContrastOp { * "overloaded virtual function `dali::Operator::RunImpl` is only partially * overridden in class `dali::brightness_contrast::BrightnessContrast`" */ - using SequenceOperator::RunImpl; + using Base::RunImpl; ~BrightnessContrastCpu() override = default; diff --git a/dali/operators/image/color/debayer.h b/dali/operators/image/color/debayer.h index 87ff8de8c41..365b1c94152 100644 --- a/dali/operators/image/color/debayer.h +++ b/dali/operators/image/color/debayer.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "dali/core/span.h" #include "dali/kernels/imgproc/color_manipulation/debayer/debayer.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/op_spec.h" #include "dali/pipeline/operator/operator.h" @@ -99,10 +100,11 @@ class DebayerImplBase { template -class Debayer : public SequenceOperator { +class Debayer : public SequenceOperator { public: + using Base = SequenceOperator; explicit Debayer(const OpSpec &spec) - : SequenceOperator(spec), + : Base(spec), alg_{debayer::parse_algorithm_name(spec.GetArgument(debayer::kAlgArgName))} { if (!spec_.HasTensorArgument(debayer::kBluePosArgName)) { std::vector blue_pos; diff --git a/dali/operators/image/convolution/filter.h b/dali/operators/image/convolution/filter.h index 9c670efe631..744c45a0fc3 100644 --- a/dali/operators/image/convolution/filter.h +++ b/dali/operators/image/convolution/filter.h @@ -22,6 +22,7 @@ #include "dali/core/boundary.h" #include "dali/core/common.h" #include "dali/core/static_switch.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -169,10 +170,11 @@ TensorListView, const In, 0> get_fill_values_ } // namespace filter template -class Filter : public SequenceOperator { +class Filter : public SequenceOperator { public: + using Base = SequenceOperator; inline explicit Filter(const OpSpec& spec) - : SequenceOperator(spec), + : Base(spec), is_valid_mode_{filter::parse_is_valid_mode(spec.GetArgument("mode"))} { spec.TryGetArgument(dtype_, "dtype"); } @@ -197,8 +199,7 @@ class Filter : public SequenceOperator { // when there are no per-frame arguments, to reduce the number of instances of // per-sample data-structure when they are not needed. bool should_expand = - SequenceOperator::ShouldExpand(ws) && - (HasPerFramePositionalArgs(ws) || SequenceOperator::HasPerFrameArgInputs(ws)); + Base::ShouldExpand(ws) && (HasPerFramePositionalArgs(ws) || Base::HasPerFrameArgInputs(ws)); if (should_expand && input_layout.size() && input_layout[0] == 'F') { assert(input_desc_.num_seq_dims >= 1); input_desc_.num_seq_dims--; diff --git a/dali/operators/image/convolution/gaussian_blur.cc b/dali/operators/image/convolution/gaussian_blur.cc index 0a7faa0ca24..f77aa70d3fe 100644 --- a/dali/operators/image/convolution/gaussian_blur.cc +++ b/dali/operators/image/convolution/gaussian_blur.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -153,7 +153,7 @@ bool GaussianBlur::ShouldExpand(const Workspace &ws) { const auto& input = ws.Input(0); auto layout = input.GetLayout(); dim_desc_ = convolution_utils::ParseAndValidateDim(input.shape().sample_dim(), layout); - bool should_expand = SequenceOperator::ShouldExpand(ws); + bool should_expand = Base::ShouldExpand(ws); if (should_expand) { assert(dim_desc_.usable_axes_start > 0); dim_desc_.total_axes_count -= dim_desc_.usable_axes_start; diff --git a/dali/operators/image/convolution/gaussian_blur.cu b/dali/operators/image/convolution/gaussian_blur.cu index b7f70e57e21..1bbb8174fac 100644 --- a/dali/operators/image/convolution/gaussian_blur.cu +++ b/dali/operators/image/convolution/gaussian_blur.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,7 +77,7 @@ bool GaussianBlur::ShouldExpand(const Workspace &ws) { const auto& input = ws.Input(0); auto layout = input.GetLayout(); dim_desc_ = convolution_utils::ParseAndValidateDim(input.shape().sample_dim(), layout); - bool should_expand = SequenceOperator::ShouldExpand(ws) && HasPerFrameArgInputs(ws); + bool should_expand = Base::ShouldExpand(ws) && HasPerFrameArgInputs(ws); if (should_expand) { assert(dim_desc_.usable_axes_start > 0); dim_desc_.total_axes_count -= dim_desc_.usable_axes_start; diff --git a/dali/operators/image/convolution/gaussian_blur.h b/dali/operators/image/convolution/gaussian_blur.h index 7977a4f5084..f54aa15f894 100644 --- a/dali/operators/image/convolution/gaussian_blur.h +++ b/dali/operators/image/convolution/gaussian_blur.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "dali/operators/image/convolution/convolution_utils.h" #include "dali/operators/image/convolution/gaussian_blur_params.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -38,10 +39,11 @@ namespace dali { #define GAUSSIAN_BLUR_SUPPORTED_AXES (1, 2, 3) template -class GaussianBlur : public SequenceOperator { +class GaussianBlur : public SequenceOperator { public: + using Base = SequenceOperator; inline explicit GaussianBlur(const OpSpec& spec) - : SequenceOperator(spec) { + : Base(spec) { spec.TryGetArgument(dtype_, "dtype"); } diff --git a/dali/operators/image/convolution/laplacian.cc b/dali/operators/image/convolution/laplacian.cc index 333dea5b2d2..3bd960c52a0 100644 --- a/dali/operators/image/convolution/laplacian.cc +++ b/dali/operators/image/convolution/laplacian.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -191,7 +191,7 @@ bool Laplacian::ShouldExpand(const Workspace &ws) { const auto& input = ws.Input(0); auto layout = input.GetLayout(); dim_desc_ = convolution_utils::ParseAndValidateDim(input.shape().sample_dim(), layout); - bool should_expand = SequenceOperator::ShouldExpand(ws); + bool should_expand = Base::ShouldExpand(ws); if (should_expand) { assert(dim_desc_.usable_axes_start > 0); dim_desc_.total_axes_count -= dim_desc_.usable_axes_start; diff --git a/dali/operators/image/convolution/laplacian.cu b/dali/operators/image/convolution/laplacian.cu index ce23cd7e631..0d75671519f 100644 --- a/dali/operators/image/convolution/laplacian.cu +++ b/dali/operators/image/convolution/laplacian.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ bool Laplacian::ShouldExpand(const Workspace &ws) { const auto& input = ws.Input(0); auto layout = input.GetLayout(); dim_desc_ = convolution_utils::ParseAndValidateDim(input.shape().sample_dim(), layout); - bool should_expand = SequenceOperator::ShouldExpand(ws) && HasPerFrameArgInputs(ws); + bool should_expand = Base::ShouldExpand(ws) && HasPerFrameArgInputs(ws); if (should_expand) { assert(dim_desc_.usable_axes_start > 0); dim_desc_.total_axes_count -= dim_desc_.usable_axes_start; diff --git a/dali/operators/image/convolution/laplacian.h b/dali/operators/image/convolution/laplacian.h index 3df1dc7a635..02300315644 100644 --- a/dali/operators/image/convolution/laplacian.h +++ b/dali/operators/image/convolution/laplacian.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "dali/operators/image/convolution/convolution_utils.h" #include "dali/operators/image/convolution/laplacian_params.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -39,10 +40,11 @@ namespace dali { template -class Laplacian : public SequenceOperator { +class Laplacian : public SequenceOperator { public: + using Base = SequenceOperator; inline explicit Laplacian(const OpSpec& spec) - : SequenceOperator(spec) { + : Base(spec) { spec.TryGetArgument(dtype_, "dtype"); } diff --git a/dali/operators/image/remap/displacement_filter.h b/dali/operators/image/remap/displacement_filter.h index 9ce77bb0b31..4a3d16c2303 100644 --- a/dali/operators/image/remap/displacement_filter.h +++ b/dali/operators/image/remap/displacement_filter.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,6 +35,8 @@ struct HasParam : std::true_type class DisplacementIdentity { public: + // helper flag for checkpointing to select proper base class + static constexpr bool is_stateless = true; explicit DisplacementIdentity(const OpSpec& spec) {} DALI_HOST_DEV @@ -47,10 +49,14 @@ class DisplacementIdentity { void Cleanup() {} }; +template +using DisplacementBase = + std::conditional_t, Operator>; + template -class DisplacementFilter : public StatelessOperator {}; +class DisplacementFilter : public DisplacementBase {}; } // namespace dali diff --git a/dali/operators/image/remap/displacement_filter_impl_cpu.h b/dali/operators/image/remap/displacement_filter_impl_cpu.h index a3c74a0fe2f..0d9b8efce9d 100644 --- a/dali/operators/image/remap/displacement_filter_impl_cpu.h +++ b/dali/operators/image/remap/displacement_filter_impl_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -64,10 +64,10 @@ void Warp( template class DisplacementFilter - : public Operator { + : public DisplacementBase { public: explicit DisplacementFilter(const OpSpec &spec) - : Operator(spec), + : DisplacementBase(spec), displace_(num_threads_, Displacement(spec)), interp_type_(spec.GetArgument("interp_type")) { has_mask_ = spec.HasTensorArgument("mask"); diff --git a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh index cfa5ab7aada..35004e2d7c4 100644 --- a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh +++ b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -214,13 +214,12 @@ void DisplacementKernel_aligned32bit( } } -template -class DisplacementFilter : public Operator { +template +class DisplacementFilter + : public DisplacementBase { public: explicit DisplacementFilter(const OpSpec &spec) : - Operator(spec), + DisplacementBase(spec), displace_(spec), interp_type_(spec.GetArgument("interp_type")) { channel_block_setup_.SetBlockDim(ivec3{kAlignedBlockDim, 1, 1}); diff --git a/dali/operators/image/remap/jitter.cuh b/dali/operators/image/remap/jitter.cuh index 414855799d0..45bb963f8ba 100644 --- a/dali/operators/image/remap/jitter.cuh +++ b/dali/operators/image/remap/jitter.cuh @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ class JitterAugment {}; template <> class JitterAugment { public: + static constexpr bool is_stateless = false; explicit JitterAugment(const OpSpec& spec) : rnd_(spec.GetArgument("seed"), rnd_size_), nDegree_(spec.GetArgument("nDegree")) { diff --git a/dali/operators/image/remap/remap.h b/dali/operators/image/remap/remap.h index 9193440c2e8..7a83336c0ca 100644 --- a/dali/operators/image/remap/remap.h +++ b/dali/operators/image/remap/remap.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,12 +15,13 @@ #ifndef DALI_OPERATORS_IMAGE_REMAP_REMAP_H_ #define DALI_OPERATORS_IMAGE_REMAP_REMAP_H_ -#include #include +#include #include "dali/core/cuda_stream_pool.h" #include "dali/kernels/imgproc/geom/remap.h" #include "dali/kernels/imgproc/geom/remap_npp.h" #include "dali/pipeline/data/views.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/common.h" #include "dali/pipeline/operator/operator.h" #include "dali/pipeline/operator/sequence_operator.h" @@ -34,10 +35,11 @@ namespace remap { #define REMAP_SUPPORTED_TYPES (uint8_t, int16_t, uint16_t, float) -template -class Remap : public SequenceOperator { +template +class Remap : public SequenceOperator { public: - explicit Remap(const OpSpec &spec) : SequenceOperator(spec) {} + using Base = SequenceOperator; + explicit Remap(const OpSpec &spec) : Base(spec) {} ~Remap() override = default; diff --git a/dali/operators/image/remap/sphere.h b/dali/operators/image/remap/sphere.h index a5d25331cd9..2e88ac09dff 100644 --- a/dali/operators/image/remap/sphere.h +++ b/dali/operators/image/remap/sphere.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ namespace dali { class SphereAugment { public: + static constexpr bool is_stateless = true; explicit SphereAugment(const OpSpec& spec) {} DALI_HOST_DEV diff --git a/dali/operators/image/remap/water.h b/dali/operators/image/remap/water.h index 749d4e9462c..b81431f9dc2 100644 --- a/dali/operators/image/remap/water.h +++ b/dali/operators/image/remap/water.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ namespace dali { class WaterAugment { public: + static constexpr bool is_stateless = true; class WaveDescr { public: WaveDescr(const OpSpec &spec, const char *direction) diff --git a/dali/operators/imgcodec/decoder.h b/dali/operators/imgcodec/decoder.h index fe1f1fa6d20..33ef896ace1 100644 --- a/dali/operators/imgcodec/decoder.h +++ b/dali/operators/imgcodec/decoder.h @@ -16,14 +16,15 @@ #include #include -#include "dali/pipeline/operator/common.h" -#include "dali/pipeline/operator/operator.h" -#include "dali/operators/imgcodec/operator_utils.h" -#include "dali/operators/image/crop/crop_attr.h" -#include "dali/operators/generic/slice/slice_attr.h" -#include "dali/operators/image/crop/random_crop_attr.h" #include "dali/imgcodec/image_decoder_interfaces.h" #include "dali/imgcodec/util/output_shape.h" +#include "dali/operators/generic/slice/slice_attr.h" +#include "dali/operators/image/crop/crop_attr.h" +#include "dali/operators/image/crop/random_crop_attr.h" +#include "dali/operators/imgcodec/operator_utils.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" +#include "dali/pipeline/operator/common.h" +#include "dali/pipeline/operator/operator.h" #ifndef DALI_OPERATORS_IMGCODEC_DECODER_H_ #define DALI_OPERATORS_IMGCODEC_DECODER_H_ @@ -32,12 +33,12 @@ namespace dali { namespace imgcodec { template -class DecoderBase : public Operator { +class DecoderBase : public StatelessOperator { public: ~DecoderBase() override = default; protected: - explicit DecoderBase(const OpSpec &spec) : Operator(spec) { + explicit DecoderBase(const OpSpec &spec) : StatelessOperator(spec) { device_id_ = spec.GetArgument("device_id"); opts_.format = spec.GetArgument("output_type"); opts_.dtype = spec.GetArgument("dtype"); diff --git a/dali/pipeline/operator/builtin/conditional/logical_not.h b/dali/pipeline/operator/builtin/conditional/logical_not.h index 3a20ce8db55..8092190b162 100644 --- a/dali/pipeline/operator/builtin/conditional/logical_not.h +++ b/dali/pipeline/operator/builtin/conditional/logical_not.h @@ -19,16 +19,18 @@ #include #include "dali/pipeline/operator/builtin/conditional/validation.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/operator.h" + namespace dali { /** * @brief Eager `not` operator from Python */ -class LogicalNot : public Operator { +class LogicalNot : public StatelessOperator { public: - explicit LogicalNot(const OpSpec &spec) : Operator(spec), name_("not") {} + explicit LogicalNot(const OpSpec &spec) : StatelessOperator(spec), name_("not") {} ~LogicalNot() override = default; diff --git a/dali/pipeline/operator/builtin/conditional/merge.h b/dali/pipeline/operator/builtin/conditional/merge.h index 9f04f573fc1..e00ba016a84 100644 --- a/dali/pipeline/operator/builtin/conditional/merge.h +++ b/dali/pipeline/operator/builtin/conditional/merge.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,13 +20,15 @@ #include "dali/core/access_order.h" #include "dali/core/common.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/operator.h" + namespace dali { template -class Merge : public Operator { +class Merge : public StatelessOperator { public: - explicit Merge(const OpSpec &spec) : Operator(spec) { + explicit Merge(const OpSpec &spec) : StatelessOperator(spec) { DALI_ENFORCE(spec.HasTensorArgument("predicate"), "The 'predicate' argument is required to be present as argument input."); RegisterTestsDiagnostics(); diff --git a/dali/pipeline/operator/builtin/conditional/split.h b/dali/pipeline/operator/builtin/conditional/split.h index 4be69c13712..463d6a2729b 100644 --- a/dali/pipeline/operator/builtin/conditional/split.h +++ b/dali/pipeline/operator/builtin/conditional/split.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,15 +17,18 @@ #include +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/operator.h" + namespace dali { template -class Split : public Operator { +class Split : public StatelessOperator { public: explicit Split(const OpSpec &spec) - : Operator(spec), if_stmt_implementation_(spec.GetArgument("_if_stmt")) { + : StatelessOperator(spec), + if_stmt_implementation_(spec.GetArgument("_if_stmt")) { DALI_ENFORCE(spec.HasTensorArgument("predicate"), "The 'predicate' argument is required to be present as argument input."); RegisterTestsDiagnostics(); diff --git a/dali/pipeline/operator/builtin/conditional/validate_logical_expr.h b/dali/pipeline/operator/builtin/conditional/validate_logical_expr.h index eaa531f79fa..571431c6109 100644 --- a/dali/pipeline/operator/builtin/conditional/validate_logical_expr.h +++ b/dali/pipeline/operator/builtin/conditional/validate_logical_expr.h @@ -19,6 +19,7 @@ #include #include "dali/pipeline/operator/builtin/conditional/validation.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/pipeline/operator/operator.h" namespace dali { @@ -28,10 +29,10 @@ namespace dali { * and `and`. The inputs are restricted to scalars, it passes them through, but copy should also * be a similarly valid option. */ -class LogicalValidate : public Operator { +class LogicalValidate : public StatelessOperator { public: explicit LogicalValidate(const OpSpec &spec) - : Operator(spec), + : StatelessOperator(spec), name_(spec.GetArgument("expression_name")), side_(spec.GetArgument("expression_side")) {} @@ -57,10 +58,10 @@ class LogicalValidate : public Operator { * @brief This is just a placeholder operator that is picked when GPU inputs are encountered * and reports a better error. */ -class LogicalFailForGpu : public Operator { +class LogicalFailForGpu : public StatelessOperator { public: explicit LogicalFailForGpu(const OpSpec &spec) - : Operator(spec), + : StatelessOperator(spec), name_(spec.GetArgument("expression_name")), side_(spec.GetArgument("expression_side")) { ReportGpuInputError(name_, side_, true); diff --git a/dali/pipeline/operator/builtin/copy.h b/dali/pipeline/operator/builtin/copy.h index 8f5dd563dae..ebf04a8ded9 100644 --- a/dali/pipeline/operator/builtin/copy.h +++ b/dali/pipeline/operator/builtin/copy.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,15 +20,16 @@ #include #include "dali/pipeline/operator/operator.h" +#include "dali/pipeline/operator/checkpointing/stateless_operator.h" #include "dali/kernels/common/scatter_gather.h" namespace dali { template -class Copy : public Operator { +class Copy : public StatelessOperator { public: inline explicit Copy(const OpSpec &spec) : - Operator(spec), scatter_gather_(kMaxSizePerBlock) {} + StatelessOperator(spec), scatter_gather_(kMaxSizePerBlock) {} inline ~Copy() override = default; diff --git a/dali/test/python/checkpointing/test_dali_checkpointing.py b/dali/test/python/checkpointing/test_dali_checkpointing.py index deffa122f73..1b748bf2ca8 100644 --- a/dali/test/python/checkpointing/test_dali_checkpointing.py +++ b/dali/test/python/checkpointing/test_dali_checkpointing.py @@ -16,18 +16,31 @@ import nvidia.dali.fn as fn import nvidia.dali.types as types import os +import re import shutil import webdataset_base import numpy as np from nvidia.dali.pipeline import pipeline_def -from test_utils import get_dali_extra_path, compare_pipelines +from test_utils import ( + compare_pipelines, + create_sign_off_decorator, + get_dali_extra_path, + module_functions, +) from nose_utils import assert_warns from nose2.tools import params, cartesian_params from nose.plugins.attrib import attr from dataclasses import dataclass from nvidia.dali import tfrecord as tfrec +from nvidia.dali.auto_aug import auto_augment as aa +from nvidia.dali.auto_aug import rand_augment as ra +from nvidia.dali.auto_aug import trivial_augment as ta from reader.test_numpy import is_gds_supported + +reader_signed_off = create_sign_off_decorator() +random_signed_off = create_sign_off_decorator() + data_root = get_dali_extra_path() images_dir = os.path.join(data_root, "db", "single", "jpeg") @@ -200,6 +213,7 @@ def pipeline(): (7, 4, 2, 5, True, False, False, False, 3, 2), (0, 32, 3, 4, True, False, False, False, 0, 3), ) +@reader_signed_off("readers.file", "file_reader") def test_file_reader( num_epochs, batch_size, @@ -235,6 +249,7 @@ def test_file_reader( (16, 6, 3, 5, False, False, True, False, 2), (6, 7, 2, 3, False, True, False, True, 3), ) +@reader_signed_off("readers.coco", "coco_reader") def test_coco_reader( num_epochs, batch_size, @@ -280,6 +295,7 @@ def test_coco_reader( (5, 6, 2, 3, False, False, True, None), (3, 8, 4, 5, False, False, False, 1), ) +@reader_signed_off("readers.mxnet", "mxnet_reader") def test_mxnet_reader( num_epochs, batch_size, @@ -319,6 +335,7 @@ def test_mxnet_reader( (10, 7, 2, 3, False, False, True, None), (2, 8, 4, 5, False, False, False, 1), ) +@reader_signed_off("readers.tfrecord", "tfrecord_reader") def test_tfrecord_reader( num_epochs, batch_size, @@ -365,6 +382,7 @@ def tfrecord_wrapper(*args, **kwargs): (5, 1, 2, 3, True, True, False, 3), (0, 2, 3, 6, True, True, True, None), ) +@reader_signed_off("readers.sequence", "sequence_reader") def test_sequence_reader( num_epochs, batch_size, @@ -402,6 +420,7 @@ def test_sequence_reader( (0, 3, 3, 4, False, False, True, None), (1, 4, 2, 3, False, False, False, 3), ) +@reader_signed_off("readers.caffe", "caffe_reader") def test_caffe_reader( num_epochs, batch_size, @@ -440,6 +459,7 @@ def test_caffe_reader( (0, 2, 4, 5, False, False, True, None), (3, 3, 1, 3, False, False, False, 2), ) +@reader_signed_off("readers.caffe2", "caffe2_reader") def test_caffe2_reader( num_epochs, batch_size, @@ -478,6 +498,7 @@ def test_caffe2_reader( (6, 64, 4, 6, True, True, False, 5), (10, 128, 3, 4, True, True, True, None), ) +@reader_signed_off("readers.webdataset") def test_webdataset_reader( num_epochs, batch_size, @@ -527,6 +548,7 @@ def test_webdataset_reader( (9, 1, 0, 1, True, False, True, False, 3), (10, 2, 0, 2, True, False, True, True, 4), ) +@reader_signed_off("readers.nemo_asr", "nemo_asr_reader") def test_nemo_asr_reader( num_epochs, batch_size, @@ -602,6 +624,7 @@ def test_nemo_asr_reader( else [] ), ) +@reader_signed_off("readers.numpy", "numpy_reader") def test_numpy_reader( device, num_epochs, @@ -789,6 +812,7 @@ class VideoConfig: VideoConfig(sequence_length=3, stride=1, step=5), ), ) +@reader_signed_off("readers.video", "video_reader") def test_video_reader( num_epochs, batch_size, iters_into_epoch, config: BaseDecoderConfig, video: VideoConfig ): @@ -823,6 +847,68 @@ def test_video_reader( ) +# simplified case of test_video_reader suite +@cartesian_params( + (2,), + (1, 3), + (0, 3), + ( + BaseDecoderConfig( + shard_id=0, num_shards=1, stick_to_shard=True, pad_last_batch=True, random_shuffle=True + ), + BaseDecoderConfig( + shard_id=6, + num_shards=7, + stick_to_shard=False, + pad_last_batch=False, + random_shuffle=False, + ), + BaseDecoderConfig( + shard_id=0, + num_shards=2, + stick_to_shard=False, + pad_last_batch=False, + random_shuffle=True, + ), + ), + (VideoConfig(sequence_length=3, stride=1, step=-1),), +) +@reader_signed_off("readers.video_resize", "video_reader_resize") +def test_video_reader_resize_reader( + num_epochs, batch_size, iters_into_epoch, config: BaseDecoderConfig, video: VideoConfig +): + files = [ + os.path.join(get_dali_extra_path(), f"db/video/multiple_framerate/{f}/{f}fps.mp4") + for f in (10, 50) + ] + + check_reader_checkpointing( + fn.readers.video_resize, + num_epochs, + batch_size, + iters_into_epoch, + device="gpu", + filenames=files, + labels=list(range(len(files))), + normalized=True, + random_shuffle=config.random_shuffle, + image_type=types.RGB, + dtype=types.FLOAT, + enable_frame_num=True, + enable_timestamps=True, + file_list_frame_num=True, + file_list_include_preceding_frame=False, + num_shards=config.num_shards, + shard_id=config.shard_id, + stick_to_shard=config.stick_to_shard, + pad_last_batch=config.pad_last_batch, + sequence_length=video.sequence_length, + stride=video.stride, + step=video.step, + size=(100, 100), + ) + + @cartesian_params( ( "cpu", @@ -845,6 +931,7 @@ def test_video_reader( ), (VideoConfig(sequence_length=3, stride=1, step=5),), ) +@reader_signed_off("experimental.readers.video") def test_experimental_video_reader( device, num_epochs, batch_size, iters_into_epoch, config: BaseDecoderConfig, video: VideoConfig ): @@ -877,6 +964,7 @@ def test_experimental_video_reader( @cartesian_params(("cpu", "gpu"), (None, (1,), (10,))) +@random_signed_off("random.coin_flip", "coin_flip") def test_random_coin_flip(device, shape): check_no_input_operator(fn.random.coin_flip, device, shape=shape) @@ -888,6 +976,7 @@ def test_random_coin_flip_pytorch(device, shape): @cartesian_params(("cpu",), (None, (1,), (10,))) +@random_signed_off("random.normal", "normal_distribution") def test_random_normal(device, shape): check_no_input_operator(fn.random.normal, device, shape=shape) @@ -899,6 +988,7 @@ def test_random_normal_pytorch(device, shape): @cartesian_params(("cpu", "gpu"), (None, (1,), (10,))) +@random_signed_off("random.uniform", "uniform") def test_random_uniform(device, shape): check_no_input_operator(fn.random.uniform, device, shape=shape) @@ -909,20 +999,24 @@ def test_random_uniform_pytorch(device, shape): check_no_input_operator_pytorch(fn.random.uniform, device, shape=shape) +@random_signed_off("segmentation.random_object_bbox") def test_random_object_bbox(): check_single_input_operator(fn.segmentation.random_object_bbox, "cpu", format="box") +@random_signed_off("segmentation.random_mask_pixel") def test_random_mask_pixel(): check_single_input_operator(fn.segmentation.random_mask_pixel, "cpu") +@random_signed_off("roi_random_crop") def test_roi_random_crop(): check_single_input_operator( fn.roi_random_crop, "cpu", crop_shape=(10, 10), roi_start=(0, 0), roi_end=(30, 30) ) +@random_signed_off("ssd_random_crop") def test_ssd_random_crop(): @pipeline_def def pipeline(): @@ -934,14 +1028,17 @@ def pipeline(): check_pipeline_checkpointing_native(pipeline) +@random_signed_off("batch_permutation") def test_batch_permutation(): check_no_input_operator(fn.batch_permutation, "cpu") +@random_signed_off("jitter") def test_jitter(): check_single_input_operator(fn.jitter, "gpu") +@random_signed_off("random_bbox_crop") def test_random_bbox_crop(): def wrapper(input, **kwargs): bboxes = fn.cast(input[:, :4, 0], dtype=types.DALIDataType.FLOAT) @@ -952,77 +1049,34 @@ def wrapper(input, **kwargs): check_single_input_operator(wrapper, "cpu") -# Stateless operators section - - -@params("cpu", "gpu") -def test_rotate_checkpointing(device): - check_single_input_operator(fn.rotate, device, angle=15) - - -@params("cpu", "gpu") -def test_resize_checkpointing(device): - check_single_input_operator(fn.resize, device, resize_x=20, resize_y=10) - - -@params("cpu", "gpu") -def test_flip_checkpointing(device): - check_single_input_operator(fn.flip, device) - - -@params("cpu", "gpu") -def test_crop_mirror_normalize_checkpointing(device): - check_single_input_operator(fn.crop_mirror_normalize, device) - - -@params("cpu", "gpu") -def test_warp_affine_checkpointing(device): - check_single_input_operator(fn.warp_affine, device, matrix=(0.3, 0.7, 5, 0.7, 0.3, -5)) - - -@params("cpu", "gpu") -def test_saturation_checkpointing(device): - check_single_input_operator(fn.saturation, device) - - @params("cpu", "gpu") -def test_reductions_min_checkpointing(device): - check_single_input_operator(fn.reductions.min, device) +@random_signed_off("noise.gaussian") +def test_noise_gaussian(device): + check_single_input_operator(fn.noise.gaussian, device, stddev=150) @params("cpu", "gpu") -def test_reductions_max_checkpointing(device): - check_single_input_operator(fn.reductions.max, device) +@random_signed_off("noise.salt_and_pepper") +def test_noise_salt_and_pepper(device): + check_single_input_operator(fn.noise.salt_and_pepper, device, prob=0.5) @params("cpu", "gpu") -def test_reductions_sum_checkpointing(device): - check_single_input_operator(fn.reductions.sum, device, dtype=types.DALIDataType.UINT8) +@random_signed_off("noise.shot") +def test_noise_shot(device): + check_single_input_operator(fn.noise.shot, device, factor=100) -@params("cpu", "gpu") -def test_equalize_checkpointing(device): - check_single_input_operator(fn.experimental.equalize, device) - - -def test_transforms_crop_checkpointing(): - check_no_input_operator(fn.transforms.crop, "cpu") - - -def test_transforms_rotation_checkpointing(): - check_no_input_operator(fn.transforms.rotation, "cpu", angle=90) - - -def test_transforms_shear_checkpointing(): - check_no_input_operator(fn.transforms.shear, "cpu", shear=(2, 2)) - - -def test_transforms_scale_checkpointing(): - check_no_input_operator(fn.transforms.scale, "cpu", scale=(2, 4)) - +@params("cpu", "mixed") +@random_signed_off("image_decoder_random_crop", "decoders.image_random_crop") +def test_image_random_crop(device): + @pipeline_def + def pipeline(): + data, _ = fn.readers.file(name="Reader", file_root=images_dir) + image = fn.decoders.image_random_crop(data, device=device) + return image -def test_transforms_translation_checkpointing(): - check_no_input_operator(fn.transforms.translation, "cpu", offset=(21, 30)) + check_pipeline_checkpointing_native(pipeline) # External source @@ -1140,6 +1194,7 @@ def src(idx): ("idx", "batch_info", "sample_info"), # indexing mode (True, False), # parallel ) +@reader_signed_off("external_source") def test_external_source_checkpointing(dataset_info, iterations, mode, parallel): epoch_size, batch_size = dataset_info source = make_dummy_source(epoch_size, batch_size, mode) @@ -1181,3 +1236,82 @@ def pipeline(): with assert_warns(glob="DALI doesn't capture state of such 'source'."): pipeline().build() + + +# Auto augmentation tests - run auto augmentations as a good example of pipeline +# consisting of many ops + + +@params("cpu", "gpu") +def test_auto_augment(device): + @pipeline_def(enable_conditionals=True) + def pipeline(): + data, _ = fn.readers.file(name="Reader", file_root=images_dir) + image = fn.decoders.image(data, device="cpu" if device == "cpu" else "mixed") + return aa.auto_augment(image) + + check_pipeline_checkpointing_native(pipeline) + + +@params("cpu", "gpu") +def test_rand_augment(device): + @pipeline_def(enable_conditionals=True) + def pipeline(): + data, _ = fn.readers.file(name="Reader", file_root=images_dir) + image = fn.decoders.image(data, device="cpu" if device == "cpu" else "mixed") + return ra.rand_augment(image, n=2, m=15) + + check_pipeline_checkpointing_native(pipeline) + + +@params("cpu", "gpu") +def test_trivial_augment(device): + @pipeline_def(enable_conditionals=True) + def pipeline(): + data, _ = fn.readers.file(name="Reader", file_root=images_dir) + image = fn.decoders.image(data, device="cpu" if device == "cpu" else "mixed") + return ta.trivial_augment_wide(image) + + check_pipeline_checkpointing_native(pipeline) + + +unsupported_readers = [ + "experimental.readers.fits", +] + +unsupported_ops = [ + "experimental.decoders.video", + "experimental.inputs.video", + "random_resized_crop", + "experimental.decoders.image_random_crop", +] + + +def test_coverage(): + from test_dali_stateless_operators import stateless_signed_off + + tested_ops = ( + stateless_signed_off.tested_ops + | reader_signed_off.tested_ops + | random_signed_off.tested_ops + ) + + excluded_ops = unsupported_readers + unsupported_ops + + fn_ops = module_functions( + fn, remove_prefix="nvidia.dali.fn", allowed_private_modules=["_conditional"] + ) + assert len(fn_ops), "There should be some DALI ops in the `fn`, got nothing" + if excluded_ops: + exclude = "|".join( + "(^" + pattern.replace(".", r"\.").replace("*", ".*").replace("?", ".") + "$)" + for pattern in excluded_ops + ) + exclude = re.compile(exclude) + fn_ops = [x for x in fn_ops if not exclude.match(x)] + not_covered = sorted(list(set(fn_ops) - tested_ops)) + not_covered_str = ",\n".join(f"'{op_name}'" for op_name in not_covered) + # we are fine with covering more we can easily list, like numba + assert ( + set(fn_ops).difference(tested_ops) == set() + ), f"Test doesn't cover {len(not_covered)} ops:\n{not_covered_str}" diff --git a/dali/test/python/checkpointing/test_dali_stateless_operators.py b/dali/test/python/checkpointing/test_dali_stateless_operators.py index aedcc217a6e..b30e7860012 100644 --- a/dali/test/python/checkpointing/test_dali_stateless_operators.py +++ b/dali/test/python/checkpointing/test_dali_stateless_operators.py @@ -18,10 +18,10 @@ import numpy as np import itertools import nvidia.dali as dali -import nvidia.dali.fn as fn -from nvidia.dali.pipeline import pipeline_def +from nvidia.dali import fn, pipeline_def, types from test_utils import ( compare_pipelines, + create_sign_off_decorator, get_dali_extra_path, check_numba_compatibility_cpu, has_operator, @@ -29,7 +29,6 @@ ) from nose2.tools import params, cartesian_params from nose_utils import assert_raises -from test_optical_flow import is_of_supported from nose.plugins.attrib import attr # Test configuration @@ -40,6 +39,9 @@ test_sequence_shape = [test_data_frames, 426, 240, 3] # 240p video +stateless_signed_off = create_sign_off_decorator() + + def tensor_list_to_array(tensor_list): if isinstance(tensor_list, dali.backend_impl.TensorListGPU): tensor_list = tensor_list.as_cpu() @@ -101,7 +103,7 @@ def move_to(tensor, device): def check_single_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source(source=RandomBatch(), layout=test_data_layout, batch=True) return op(move_to(data, device), device=device, **kwargs) @@ -110,7 +112,7 @@ def pipeline_factory(): def check_single_sequence_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source( source=RandomBatch(data_shape=test_sequence_shape), layout="FHWC", batch=True @@ -121,7 +123,7 @@ def pipeline_factory(): def check_single_signal_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source( source=RandomBatch(data_shape=[30, 40], dtype=np.float32), layout="ft", batch=True @@ -132,7 +134,7 @@ def pipeline_factory(): def check_single_1d_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source( source=RandomBatch(data_shape=[100], dtype=np.float32), batch=True @@ -143,7 +145,7 @@ def pipeline_factory(): def check_single_encoded_jpeg_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): img = os.path.join(get_dali_extra_path(), "db/single/jpeg/100/swan-3584559_640.jpg") jpegs, _ = fn.readers.file(files=[img], pad_last_batch=True) @@ -153,7 +155,7 @@ def pipeline_factory(): def check_single_encoded_audio_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): wav = os.path.join(get_dali_extra_path(), "db/audio/wav/237-134500-0000.wav") audio, _ = fn.readers.file(files=[wav], pad_last_batch=True) @@ -163,7 +165,7 @@ def pipeline_factory(): def check_single_bbox_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source(source=RandomBoundingBoxBatch(), batch=True) return op(move_to(data, device), device=device, **kwargs) @@ -172,7 +174,7 @@ def pipeline_factory(): def check_no_input(op, device, **kwargs): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): return op(device=device, **kwargs) @@ -193,158 +195,362 @@ def test_stateful(device): @params("cpu", "gpu") +@stateless_signed_off("rotate") def test_rotate_stateless(device): check_single_input(fn.rotate, device, angle=40) @params("cpu", "gpu") +@stateless_signed_off("resize") def test_resize_stateless(device): check_single_input(fn.resize, device, resize_x=50, resize_y=50) @params("cpu", "gpu") +@stateless_signed_off("experimental.tensor_resize") +def test_tensor_resize_stateless(device): + check_single_input(fn.experimental.tensor_resize, device, axes=[0, 1], sizes=[40, 40]) + + +@params("cpu", "gpu") +@stateless_signed_off("flip") def test_flip_stateless(device): check_single_input(fn.flip, device) @params("cpu", "gpu") +@stateless_signed_off("crop") +def test_crop_stateless(device): + check_single_input(fn.crop, device, crop=(20, 20)) + + +@params("cpu", "gpu") +@stateless_signed_off("crop_mirror_normalize") def test_crop_mirror_normalize_stateless(device): - check_single_input(fn.crop_mirror_normalize, device) + check_single_input(fn.crop_mirror_normalize, device, crop=(20, 20), mirror=True) @params("cpu", "gpu") +@stateless_signed_off("warp_affine") def test_warp_affine_stateless(device): check_single_input(fn.warp_affine, device, matrix=(0.1, 0.9, 10, 0.8, -0.2, -20)) @params("cpu", "gpu") +@stateless_signed_off("color_twist") +def test_color_twist_stateless(device): + check_single_input( + fn.color_twist, + device, + brightness=1.0, + contrast=0.5, + hue=90, + saturation=1.2, + ) + + +@params("cpu", "gpu") +@stateless_signed_off("hsv") +def test_hsv_stateless(device): + check_single_input( + fn.hsv, + device, + hue=70, + value=1.8, + saturation=1.2, + ) + + +@params("cpu", "gpu") +@stateless_signed_off("hue") +def test_hue_stateless(device): + check_single_input(fn.hue, device, hue=-90) + + +@params("cpu", "gpu") +@stateless_signed_off("saturation") def test_saturation_stateless(device): check_single_input(fn.saturation, device) @params("cpu", "gpu") +@stateless_signed_off("brightness_contrast", "brightness", "contrast") +def test_brightness_contrast_stateless(device): + check_single_input(fn.brightness_contrast, device, brightness=0.7, contrast=1.7) + + +@params("cpu", "gpu") +@stateless_signed_off("reductions.min") def test_reductions_min_stateless(device): check_single_input(fn.reductions.min, device) @params("cpu", "gpu") +@stateless_signed_off("reductions.max") def test_reductions_max_stateless(device): check_single_input(fn.reductions.max, device) @params("cpu", "gpu") +@stateless_signed_off("reductions.sum") def test_reductions_sum_stateless(device): check_single_input(fn.reductions.sum, device) @params("cpu", "gpu") +@stateless_signed_off("reductions.mean") +def test_reductions_mean_stateless(device): + check_single_input(fn.reductions.mean, device) + + +@params("cpu", "gpu") +@stateless_signed_off("reductions.mean_square") +def test_reductions_mean_square_stateless(device): + check_single_input(fn.reductions.mean_square, device) + + +@params("cpu", "gpu") +@stateless_signed_off("reductions.rms") +def test_reductions_rms_stateless(device): + check_single_input(fn.reductions.rms, device) + + +@params("cpu", "gpu") +@stateless_signed_off("reductions.std_dev") +def test_reductions_std_dev_stateless(device): + check_single_input(lambda x, **kwargs: fn.reductions.std_dev(x, 0.0, **kwargs), device) + + +@params("cpu", "gpu") +@stateless_signed_off("reductions.variance") +def test_reductions_variance_stateless(device): + check_single_input(lambda x, **kwargs: fn.reductions.variance(x, 5.0, **kwargs), device) + + +@params("cpu", "gpu") +@stateless_signed_off("experimental.equalize") def test_equalize_stateless(device): check_single_input(fn.experimental.equalize, device) +@stateless_signed_off("transforms.crop") def test_transforms_crop_stateless(): check_no_input(fn.transforms.crop, "cpu") +@stateless_signed_off("transforms.rotation") def test_transforms_rotation_stateless(): check_no_input(fn.transforms.rotation, "cpu", angle=35) +@stateless_signed_off("transforms.shear") def test_transforms_shear_stateless(): check_no_input(fn.transforms.shear, "cpu", shear=(2, 2)) +@stateless_signed_off("transforms.scale") def test_transforms_scale_stateless(): check_no_input(fn.transforms.scale, "cpu", scale=(3, 2)) +@stateless_signed_off( + "transforms.translation", "hidden.transform_translation", "transform_translation" +) def test_transforms_translation_stateless(): check_no_input(fn.transforms.translation, "cpu", offset=(4, 3)) @params("cpu", "gpu") +@stateless_signed_off("coord_transform") +def test_coord_transform(device): + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch((5, 2)), layout="NX") + if device == "gpu": + data = data.gpu() + return fn.coord_transform(data, M=(0.1, 0.9, 10, 0.8, -0.2, -20)) + + check_is_pipeline_stateless(pipeline_factory) + + +@stateless_signed_off("transforms.combine") +def test_transforms_combine_stateless(): + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + scale = fn.transforms.scale(scale=(3, 2)) + shear = fn.transforms.shear(shear=(2, 2)) + return fn.transforms.combine(scale, shear) + + check_is_pipeline_stateless(pipeline_factory) + + +@params("cpu", "gpu") +@stateless_signed_off("one_hot") def test_one_hot_stateless(device): check_single_input(fn.one_hot, device) +@stateless_signed_off("experimental.median_blur") def test_median_bluer_stateless(): check_single_input(fn.experimental.median_blur, "gpu") @params("cpu", "gpu") +@stateless_signed_off("erase") def test_erase_stateless(device): check_single_input(fn.erase, device, anchor=(3, 4), shape=(5, 6)) @params("cpu", "gpu") +@stateless_signed_off("pad") def test_pad_stateless(device): check_single_input(fn.pad, device, shape=(100, 100, 3)) @params("cpu", "gpu") +@stateless_signed_off("constant", "hidden.constant") def test_constant_stateless(device): check_no_input(fn.constant, device, idata=[1, 2, 3]) @params("cpu", "gpu") +@stateless_signed_off("reshape", "reinterpret") def test_reshape_stateless(device): check_single_input(fn.reshape, device, shape=[1, -1]) @params("cpu", "gpu") +@stateless_signed_off("lookup_table") def test_lookup_table_stateless(device): check_single_input(fn.lookup_table, device, keys=[0], values=[1], default_value=123) @params("cpu", "gpu") +@stateless_signed_off("transpose") def test_transpose_stateless(device): check_single_input(fn.transpose, device, perm=[2, 0, 1]) +@stateless_signed_off("paste") def test_paste_stateless(): check_single_input(fn.paste, "gpu", fill_value=0, ratio=2) @params("cpu", "gpu") +@stateless_signed_off("laplacian") +def test_laplacian_stateless(device): + check_single_input(fn.laplacian, device, window_size=3) + + +@params("cpu", "gpu") +@stateless_signed_off("gaussian_blur") +def test_gaussian_blur_stateless(device): + check_single_input(fn.gaussian_blur, device, window_size=3) + + +@params("cpu", "gpu") +@stateless_signed_off("water") +def test_water_stateless(device): + check_single_input(fn.water, device) + + +@params("cpu", "gpu") +@stateless_signed_off("sphere") +def test_sphere_stateless(device): + check_single_input(fn.sphere, device) + + +@params("cpu", "gpu") +@stateless_signed_off("experimental.filter") +def test_filter_stateless(device): + check_single_input( + lambda x, **kwargs: fn.experimental.filter(x, np.full((3, 3), 1 / 9), **kwargs), + device, + ) + + +@stateless_signed_off("experimental.remap") +def test_remap_stateless(): + rng = np.random.default_rng(42) + np_map_x = 128 * rng.uniform(size=(100, 128)) + np_map_y = 100 * rng.uniform(size=(100, 128)) + + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch((100, 128, 3)), layout="HWC") + data = data.gpu() + map_x = types.Constant(np_map_x).gpu() + map_y = types.Constant(np_map_y).gpu() + return fn.experimental.remap(data, map_x, map_y) + + check_is_pipeline_stateless(pipeline_factory) + + +@stateless_signed_off("experimental.debayer") +def test_debayer_stateless(): + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch((40, 40)), layout="HW", batch=True) + return fn.experimental.debayer(data.gpu(), blue_position=[0, 0]) + + check_is_pipeline_stateless(pipeline_factory) + + +@params("cpu", "gpu") +@stateless_signed_off("copy") +def test_copy_stateless(device): + check_single_input(fn.copy, device) + + +@params("cpu", "gpu") +@stateless_signed_off("color_space_conversion") def test_color_space_conversion_stateless(device): check_single_input( fn.color_space_conversion, device, - image_type=dali.types.DALIImageType.RGB, - output_type=dali.types.DALIImageType.YCbCr, + image_type=types.DALIImageType.RGB, + output_type=types.DALIImageType.YCbCr, ) +@params("cpu", "gpu") +@stateless_signed_off("resize_crop_mirror", "fast_resize_crop_mirror") def test_resize_crop_mirror_stateless(device): - check_single_input(fn.resize_crop_mirror, "cpu", crop=(2, 2, 3), mirror=True) + check_single_input(fn.resize_crop_mirror, device, size=(35, 55), crop=(20, 20), mirror=True) @params("cpu", "gpu") +@stateless_signed_off("slice") def test_slice_stateless(device): check_single_input(fn.slice, device, rel_start=(0.25, 0.25), rel_end=(0.75, 0.75)) @params("cpu", "gpu") +@stateless_signed_off("shapes") def test_shapes_stateless(device): check_single_input(fn.shapes, device) @params("cpu", "gpu") +@stateless_signed_off("per_frame") def test_per_frame_stateless(device): check_single_input(fn.per_frame, device, replace=True) @params("cpu", "gpu") +@stateless_signed_off("get_property") def test_get_property_stateless(device): check_single_input(fn.get_property, device, key="layout") @params("cpu", "gpu") +@stateless_signed_off("jpeg_compression_distortion") def test_jpeg_compression_distortion_stateless(device): check_single_input(fn.jpeg_compression_distortion, device) @params("cpu", "gpu") +@stateless_signed_off("multi_paste") def test_multi_paste_stateless(device): check_single_input( fn.multi_paste, device, in_ids=list(range(batch_size)), output_size=[100, 100] @@ -352,22 +558,28 @@ def test_multi_paste_stateless(device): @params("cpu", "gpu") +@stateless_signed_off("grid_mask") def test_grid_mask_stateless(device): check_single_input(fn.grid_mask, device) @params("cpu", "gpu") +@stateless_signed_off("preemphasis_filter") def test_preemphasis_filter_stateless(device): check_single_input(fn.preemphasis_filter, device) +@stateless_signed_off("optical_flow") def test_optical_flow_stateless(): + from test_optical_flow import is_of_supported + if not is_of_supported(): raise nose.SkipTest("Optical Flow is not supported on this platform") check_single_sequence_input(fn.optical_flow, "gpu") @params("cpu", "gpu") +@stateless_signed_off("sequence_rearrange") def test_sequence_rearrange_stateless(device): check_single_sequence_input( fn.sequence_rearrange, device, new_order=list(range(test_data_frames)) @@ -375,15 +587,18 @@ def test_sequence_rearrange_stateless(device): @params("cpu", "gpu") +@stateless_signed_off("spectrogram") def test_spectrogram_stateless(device): check_single_1d_input(fn.spectrogram, device) +@stateless_signed_off("power_spectrum") def test_power_spectrum_stateless(): check_single_signal_input(fn.power_spectrum, "cpu") @params("cpu", "gpu") +@stateless_signed_off("dump_image") def test_dump_image_stateless(device): suffix = "test_dump_image_stateless_tmp" check_single_input(fn.dump_image, device, suffix=suffix) @@ -392,55 +607,60 @@ def test_dump_image_stateless(device): @params("cpu", "gpu") -def test_variance_stateless(device): - check_single_1d_input(lambda x, **kwargs: fn.reductions.variance(x, 0.0, **kwargs), device) - - -@params("cpu", "gpu") +@stateless_signed_off("normalize") def test_normalize_stateless(device): check_single_input(fn.normalize, device) @params("cpu", "gpu") +@stateless_signed_off("mel_filter_bank") def test_mel_filter_bank_stateless(device): check_single_signal_input(fn.mel_filter_bank, device) @params("cpu", "gpu") +@stateless_signed_off("mfcc") def test_mfcc_stateless(device): check_single_signal_input(fn.mfcc, device) @params("cpu", "gpu") +@stateless_signed_off("nonsilent_region") def test_nonsilent_region_stateless(device): check_single_1d_input(lambda *args, **kwargs: fn.nonsilent_region(*args, **kwargs)[0], device) @params("cpu", "gpu") +@stateless_signed_off("audio_resample", "experimental.audio_resample") def test_audio_resample_stateless(device): check_single_signal_input(fn.audio_resample, device, scale=0.5) @params("cpu", "gpu") +@stateless_signed_off("element_extract") def test_element_extract_stateless(device): check_single_sequence_input(fn.element_extract, device, element_map=[0]) +@stateless_signed_off("bbox_paste") def test_bbox_paste_stateless(): check_single_bbox_input(fn.bbox_paste, "cpu", ratio=2) @params("cpu", "gpu") +@stateless_signed_off("bb_flip") def test_bb_flip_stateless(device): check_single_bbox_input(fn.bb_flip, device, ltrb=True) @params("cpu", "gpu") +@stateless_signed_off("to_decibels") def test_to_decibels_stateless(device): check_single_signal_input(fn.to_decibels, device) @cartesian_params(("cpu", "gpu"), (fn.stack, fn.cat)) +@stateless_signed_off("stack", "cat") def test_tensor_join_stateless(device, join): def wrapper(x, **kwargs): return join(x, x, x, **kwargs) @@ -449,11 +669,37 @@ def wrapper(x, **kwargs): @params("cpu", "gpu") +@stateless_signed_off("tensor_subscript", "hidden.tensor_subscript") def test_tensor_subscript_stateless(device): - check_single_input(lambda x, **kwargs: x[0, :, 2:3], device) + check_single_input(lambda x, **kwargs: x[0, :, 2:4:-1], device) @params("cpu", "gpu") +@stateless_signed_off("subscript_dim_check", "hidden.subscript_dim_check") +def test_subscript_dim_check(device): + check_single_input(lambda x, **kwargs: x[:], device) + + +@params("cpu", "gpu") +@stateless_signed_off("expand_dims") +def test_expand_dims(device): + check_single_input(fn.expand_dims, device, axes=[0]) + + +@params("cpu", "gpu") +@stateless_signed_off("squeeze") +def test_squeeze(device): + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch((40, 1, 50, 1)), layout="DHWC") + data = move_to(data, device) + return fn.squeeze(data, axis_names="HC") + + check_is_pipeline_stateless(pipeline_factory) + + +@params("cpu", "gpu") +@stateless_signed_off("permute_batch") def test_permute_batch_stateless(device): def wrapper(x, **kwargs): return fn.permute_batch(x, indices=[0] * batch_size, **kwargs) @@ -461,13 +707,14 @@ def wrapper(x, **kwargs): check_single_input(wrapper, device) +@stateless_signed_off("segmentation.select_masks") def test_select_masks_stateless(): n = 10 polygons = np.asarray([[i, 0, i] for i in range(n)]) vertices = np.asarray([[i, i + 1] for i in range(n)]) mask_ids = np.asarray([i for i in range(n) if i % 2 == 0]) - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): return tuple(fn.segmentation.select_masks(mask_ids, polygons, vertices)) @@ -475,13 +722,14 @@ def pipeline_factory(): @params("cpu", "gpu") +@stateless_signed_off("box_encoder") def test_box_encoder_stateless(device): n = 10 boxes = np.asarray([[float(i), float(i), float(i + 1), float(i + 1)] for i in range(n)]) labels = np.asarray(list(range(n))) anchors = [float(i) for i in range(4)] - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): return tuple(fn.box_encoder(boxes, labels, anchors=anchors, device=device)) @@ -490,6 +738,7 @@ def pipeline_factory(): @attr("cupy") @params("cpu", "gpu") +@stateless_signed_off("python_function") def test_python_function_stateless(device): def wrapper(x, **kwargs): return fn.python_function(x, function=lambda x: x * 2, **kwargs) @@ -497,7 +746,17 @@ def wrapper(x, **kwargs): check_single_input(wrapper, device) +@params("cpu", "gpu") +@stateless_signed_off("dl_tensor_python_function") +def test_dl_tensor_python_function_stateless(device): + def wrapper(x, **kwargs): + return fn.dl_tensor_python_function(x, function=lambda x: x, **kwargs) + + check_single_input(wrapper, device) + + @attr("numba") +@stateless_signed_off("experimental.numba_function") def test_numba_function_stateless(): import nvidia.dali.plugin.numba as dali_numba @@ -506,7 +765,7 @@ def test_numba_function_stateless(): def double_sample(out_sample, in_sample): out_sample[:] = 2 * in_sample[:] - @pipeline_def(batch_size=2, device_id=0, num_threads=4) + @pipeline_def(batch_size=2, device_id=0, num_threads=4, enable_checkpointing=True) def numba_pipe(): forty_two = fn.external_source( source=lambda x: np.full((2,), 42, dtype=np.uint8), batch=False @@ -514,8 +773,8 @@ def numba_pipe(): out = dali_numba.fn.experimental.numba_function( forty_two, run_fn=double_sample, - out_types=[dali.types.DALIDataType.UINT8], - in_types=[dali.types.DALIDataType.UINT8], + out_types=[types.DALIDataType.UINT8], + in_types=[types.DALIDataType.UINT8], outs_ndim=[1], ins_ndim=[1], batch_processing=False, @@ -527,6 +786,7 @@ def numba_pipe(): @has_operator("experimental.inflate") @restrict_platform(min_compute_cap=6.0, platforms=["x86_64"]) +@stateless_signed_off("experimental.inflate") def test_inflate_stateless(): import lz4.block @@ -540,7 +800,7 @@ def sample_to_lz4(sample): input_shape = [np.array(sample.shape, dtype=np.int32) for sample in batch] - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline(): deflated = fn.external_source(source=itertools.repeat(input_data)) shape = fn.external_source(source=itertools.repeat(input_shape)) @@ -549,14 +809,17 @@ def pipeline(): check_is_pipeline_stateless(pipeline) +@stateless_signed_off("peek_image_shape") def test_peek_image_shape_stateless(): check_single_encoded_jpeg_input(fn.peek_image_shape, "cpu") +@stateless_signed_off("experimental.peek_image_shape") def test_imgcodec_peek_image_shape_stateless(): check_single_encoded_jpeg_input(fn.experimental.peek_image_shape, "cpu") +@stateless_signed_off("decoders.audio", "audio_decoder") def test_audio_decoder_stateless(): def audio_decoder_wrapper(*args, **kwargs): return fn.decoders.audio(*args, **kwargs)[0] @@ -565,18 +828,47 @@ def audio_decoder_wrapper(*args, **kwargs): @params("cpu", "mixed") +@stateless_signed_off("decoders.image", "image_decoder") def test_image_decoder_stateless(device): check_single_encoded_jpeg_input(fn.decoders.image, device) @params("cpu", "mixed") +@stateless_signed_off("experimental.decoders.image") +def test_experimental_image_decoder_stateless(device): + check_single_encoded_jpeg_input(fn.experimental.decoders.image, device) + + +@params("cpu", "mixed") +@stateless_signed_off("decoders.image_crop", "image_decoder_crop") def test_image_decoder_crop_stateless(device): - check_single_encoded_jpeg_input(fn.decoders.image_crop, device) + check_single_encoded_jpeg_input(fn.decoders.image_crop, device, crop=(20, 50)) + + +@params("cpu", "mixed") +@stateless_signed_off("experimental.decoders.image_crop") +def test_experimental_image_decoder_crop_stateless(device): + check_single_encoded_jpeg_input(fn.experimental.decoders.image_crop, device, crop=(20, 50)) + + +@params("cpu", "mixed") +@stateless_signed_off("decoders.image_slice", "image_decoder_slice") +def test_image_decoder_slice_stateless(device): + check_single_encoded_jpeg_input(fn.decoders.image_slice, device, start=(5, 5), end=(45, 45)) + + +@params("cpu", "mixed") +@stateless_signed_off("experimental.decoders.image_slice") +def test_experimental_image_decoder_slice_stateless(device): + check_single_encoded_jpeg_input( + fn.experimental.decoders.image_slice, device, start=(5, 5), end=(45, 45) + ) @params("cpu", "gpu") +@stateless_signed_off("coord_flip") def test_coord_flip_stateless(device): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): input = np.array([[1], [2], [3]], dtype=np.float32) return fn.coord_flip(input, flip_x=True, center_x=0, device=device) @@ -585,8 +877,9 @@ def pipeline_factory(): @params("cpu", "gpu") +@stateless_signed_off("cast_like") def test_cast_like_stateless(device): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): return fn.cast_like( np.array([1, 2, 3], dtype=np.int32), np.array([1.0], dtype=np.float32), device=device @@ -595,6 +888,20 @@ def pipeline_factory(): check_is_pipeline_stateless(pipeline_factory) +@params("cpu", "gpu") +@stateless_signed_off("cast") +def test_cast_stateless(device): + @pipeline_def(enable_checkpointing=True) + def pipeline_factory(): + return fn.cast( + np.array([1, 2, 3], dtype=np.int32), + dtype=types.DALIDataType.INT16, + device=device, + ) + + check_is_pipeline_stateless(pipeline_factory) + + def arithm_ops_outputs(data): return ( data * 2, @@ -616,10 +923,57 @@ def arithm_ops_outputs(data): @params("cpu", "gpu") +@stateless_signed_off("hidden.arithmetic_generic_op", "arithmetic_generic_op") def test_arithm_ops_stateless_cpu(device): - @pipeline_def + @pipeline_def(enable_checkpointing=True) def pipeline_factory(): data = fn.external_source(source=RandomBatch(), layout="HWC") return arithm_ops_outputs(move_to(data, device)) check_is_pipeline_stateless(pipeline_factory) + + +@params("cpu", "gpu") +@stateless_signed_off( + "_conditional.hidden.not_", + "_conditional.hidden.validate_logical", + "_conditional.not_", + "_conditional.validate_logical", +) +def test_logic_ops(device): + @pipeline_def(enable_conditionals=True, enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch(), layout="HWC") + if device == "gpu": + data = data.gpu() + condition_1 = fn.external_source(source=RandomBatch(data_shape=())) < 125 + condition_2 = fn.external_source(source=RandomBatch(data_shape=())) >= 125 + condition_3 = fn.external_source(source=RandomBatch(data_shape=())) <= 100 + if condition_1 and not condition_2 or not condition_3: + return data + else: + return data + 1 + + check_is_pipeline_stateless(pipeline_factory) + + +@params("cpu", "gpu") +@stateless_signed_off( + "_conditional.hidden.merge", + "_conditional.hidden.split", + "_conditional.merge", + "_conditional.split", +) +def test_split_and_merge(device): + @pipeline_def(enable_conditionals=True, enable_checkpointing=True) + def pipeline_factory(): + data = fn.external_source(source=RandomBatch(), layout="HWC") + if device == "gpu": + data = data.gpu() + condition = fn.external_source(source=RandomBatch(data_shape=())) <= 130 + if condition: + return data + else: + return data + types.Constant(1, dtype=types.DALIDataType.UINT8) + + check_is_pipeline_stateless(pipeline_factory) diff --git a/dali/test/python/test_utils.py b/dali/test/python/test_utils.py index 8d002a87635..4a7eb4fbccd 100644 --- a/dali/test/python/test_utils.py +++ b/dali/test/python/test_utils.py @@ -908,3 +908,24 @@ def check_numba_compatibility_gpu(if_skip=True): return False if not if_skip: return True + + +def create_sign_off_decorator(): + _tested_ops = [] + + class SignOff: + def __call__(self, *op_names): + assert all(isinstance(op_name, str) for op_name in op_names) + assert len(op_names) + _tested_ops.extend(op_names) + + def dummy(fn): + return fn + + return dummy + + @property + def tested_ops(self): + return set(_tested_ops) + + return SignOff() diff --git a/qa/TL0_python-self-test-core/test_body.sh b/qa/TL0_python-self-test-core/test_body.sh index 089496acb66..9fd2881d390 100644 --- a/qa/TL0_python-self-test-core/test_body.sh +++ b/qa/TL0_python-self-test-core/test_body.sh @@ -16,6 +16,7 @@ test_py_with_framework() { ${python_invoke_test} --attr '!slow,!pytorch,!mxnet,!cupy' ${test_script} done ${python_new_invoke_test} -A 'numba' -s type_annotations + ${python_new_invoke_test} -A '!slow,numba' -s checkpointing } test_py() { diff --git a/qa/TL0_python_self_test_frameworks/test_cupy.sh b/qa/TL0_python_self_test_frameworks/test_cupy.sh index 4d8c3313e06..84fb99610c0 100755 --- a/qa/TL0_python_self_test_frameworks/test_cupy.sh +++ b/qa/TL0_python_self_test_frameworks/test_cupy.sh @@ -10,6 +10,7 @@ test_body() { ${python_invoke_test} test_external_source_cupy.py ${python_invoke_test} --attr 'cupy' test_external_source_impl_utils.py ${python_invoke_test} --attr 'cupy' test_pipeline_debug.py + ${python_new_invoke_test} -A '!slow,cupy' -s checkpointing } pushd ../..