From 79b019b102c5d68843d52473f7d26a80597d84d2 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Thu, 23 Jun 2022 11:43:07 -0700 Subject: [PATCH] api: depthwise post-op with any kernel, stride and padding values --- doc/programming_model/attributes_post_ops.md | 10 +- include/oneapi/dnnl/dnnl.h | 64 ++++ include/oneapi/dnnl/dnnl.hpp | 136 ++++++--- .../src/benchdnn_generator.py | 3 +- scripts/verbose_converter/src/dnnl_parser.py | 12 +- src/common/primitive_attr.cpp | 67 ++-- src/common/primitive_attr.hpp | 19 +- src/common/primitive_hashing.cpp | 4 + src/common/serialization.cpp | 2 + src/common/verbose.cpp | 3 +- src/cpu/dw_convolution_utils.hpp | 21 +- .../x64/jit_avx512_common_1x1_convolution.cpp | 6 +- src/cpu/x64/jit_sse41_1x1_convolution.cpp | 5 +- tests/benchdnn/conv/conv_dw_fusion.cpp | 25 +- tests/benchdnn/dnn_types.cpp | 50 +-- tests/benchdnn/dnn_types.hpp | 9 +- tests/benchdnn/doc/knobs_attr.md | 7 +- .../inputs/conv/harness_conv_fused_depthwise | 288 ++++++++++++++++++ tests/benchdnn/utils/parser.cpp | 2 +- tests/gtests/test_iface_attr.cpp | 20 ++ 20 files changed, 629 insertions(+), 124 deletions(-) diff --git a/doc/programming_model/attributes_post_ops.md b/doc/programming_model/attributes_post_ops.md index 94f24b264ff..78cc63638df 100644 --- a/doc/programming_model/attributes_post_ops.md +++ b/doc/programming_model/attributes_post_ops.md @@ -164,12 +164,14 @@ convolution. The @ref dnnl::primitive::kind of this post-op is #dnnl::primitive::kind::convolution. -There are two variants of this post-op: `dw_k3s1p1` and `dw_k3s2p1` for stride-1 -and stride-2 respectively. +Three variants of depthwise post-op are supported: +* `dw_k3s1p1` for the case of stride 1, kernel size 3, and left padding of 1. +* `dw_k3s2p1` for the case of stride 2, kernel size 3, and left padding of 1. +* `dw` for a general case. API: -- C: @ref dnnl_post_ops_append_dw_k3s1p1 , @ref dnnl_post_ops_append_dw_k3s2p1 -- C++: @ref dnnl::post_ops::append_dw_k3s1p1 , @ref dnnl::post_ops::append_dw_k3s2p1 +- C: @ref dnnl_post_ops_append_dw , @ref dnnl_post_ops_append_dw_k3s1p1 , @ref dnnl_post_ops_append_dw_k3s2p1 +- C++: @ref dnnl::post_ops::append_dw , @ref dnnl::post_ops::append_dw_k3s1p1 , @ref dnnl::post_ops::append_dw_k3s2p1 For better readability, below we assume a 2D convolution and use the following notations: diff --git a/include/oneapi/dnnl/dnnl.h b/include/oneapi/dnnl/dnnl.h index 2673c624535..f71084445aa 100644 --- a/include/oneapi/dnnl/dnnl.h +++ b/include/oneapi/dnnl/dnnl.h @@ -792,6 +792,70 @@ dnnl_status_t DNNL_API dnnl_post_ops_get_params_eltwise( const_dnnl_post_ops_t post_ops, int index, float *scale, dnnl_alg_kind_t *alg_kind, float *alpha, float *beta); +/// Appends a depthwise post-op convolution. +/// +/// This post-op can only be fused with a 2D 1x1 convolution (convolution with +/// weights spatial dimensions equal to 1 i.e., kh=kw=1). +/// +/// The kind of this post-op is #dnnl_convolution. +/// +/// The number of outputs for primitive with fusion is one. The output spatial +/// size can be derived as below: +/// +/// output_height = ceil(output_height_1x1_convolution, stride) +/// output_width = ceil(output_width_1x1_convolution, stride) +/// +/// See @ref dev_guide_attributes_post_ops_depthwise and +/// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info. +/// +/// @param post_ops Post-ops. +/// @param weights_data_type Weights data type of depthwise post-op +/// @param bias_data_type Bias data type of depthwise post-op +/// @param dst_data_type Output data type of depthwise post-op +/// @param kernel_size Size of kernel of depthwise post-op +/// @param stride_size Size of stride of depthwise post-op +/// @param padding_l_size Size of left and top paddings of depthwise post-op +/// @param count Output length of the array of scaling factors @p scales. +/// @param mask Output scaling factors correspondence mask that defines the +/// correspondence between the output tensor dimensions and the @p +/// scales array. The set i-th bit indicates that a dedicated output scaling +/// factor is used for each index along that dimension. The mask value of 0 +/// implies a common scaling factor for the whole output tensor. +/// @param scales Output pointer to a constant array of float scaling factors. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise +dnnl_status_t DNNL_API dnnl_post_ops_append_dw(dnnl_post_ops_t post_ops, + dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, + dnnl_data_type_t dst_data_type, dnnl_dim_t kernel_size, + dnnl_dim_t stride_size, dnnl_dim_t padding_l_size, dnnl_dim_t count, + int mask, const float *scales); + +/// Returns the parameters of an depthwise post-op. +/// +/// @param post_ops Post-ops. +/// @param index Index of the elementwise post-op. +/// @param weights_data_type Weights data type of depthwise post-op +/// @param bias_data_type Bias data type of depthwise post-op +/// @param dst_data_type Output data type of depthwise post-op +/// @param kernel_size Size of kernel of depthwise post-op +/// @param stride_size Size of stride of depthwise post-op +/// @param padding_l_size Size of left and top paddings of depthwise post-op +/// @param count Output length of the array of scaling factors @p scales. +/// @param mask Output scaling factors correspondence mask that defines the +/// correspondence between the output tensor dimensions and the @p +/// scales array. The set i-th bit indicates that a dedicated output scaling +/// factor is used for each index along that dimension. The mask value of 0 +/// implies a common scaling factor for the whole output tensor. +/// @param scales Output pointer to a constant array of float scaling factors. +/// @returns #dnnl_success on success and a status describing the error +/// otherwise +dnnl_status_t DNNL_API dnnl_post_ops_get_params_dw( + const_dnnl_post_ops_t post_ops, int index, + dnnl_data_type_t *weights_data_type, dnnl_data_type_t *bias_data_type, + dnnl_data_type_t *dst_data_type, dnnl_dim_t *kernel_size, + dnnl_dim_t *stride_size, dnnl_dim_t *padding_l_size, dnnl_dim_t *count, + int *mask, const float **scales); + /// Appends a depthwise post-op convolution with stride 1. /// /// This post-op can only be fused with a 2D 1x1 convolution (convolution with diff --git a/include/oneapi/dnnl/dnnl.hpp b/include/oneapi/dnnl/dnnl.hpp index 0a6fa01b2d8..2fecd007dc9 100644 --- a/include/oneapi/dnnl/dnnl.hpp +++ b/include/oneapi/dnnl/dnnl.hpp @@ -3081,7 +3081,7 @@ struct post_ops : public handle { aalgorithm = static_cast(c_alg); } - /// Appends a depthwise post-op convolution with stride 1. + /// Appends a depthwise post-op convolution. /// /// This post-op can only be fused with a 2D 1x1 convolution (convolution /// with weights spatial dimension equal to 1 i.e., kh=kw=1). @@ -3089,11 +3089,10 @@ struct post_ops : public handle { /// The kind of this post-op is #dnnl_convolution. /// /// The number of outputs for primitive remain same as before. The output - /// size remain same as the original primitive due to stride=1. - /// - /// The Post-op can be defined as: + /// spatial size can be derived as below: /// - /// dst[:] <- scales * (conv_dw(conv_1x1)) + /// output_height = ceil(output_height_1x1_convolution, stride) + /// output_width = ceil(output_width_1x1_convolution, stride) /// /// See @ref dev_guide_attributes_post_ops_depthwise and /// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info. @@ -3101,6 +3100,9 @@ struct post_ops : public handle { /// @param weights_data_type Weights data type of depthwise post-op /// @param bias_data_type Bias data type of depthwise post-op /// @param dst_data_type Output data type of depthwise post-op + /// @param kernel_size Size of kernel of depthwise post-op + /// @param stride_size Size of stride of depthwise post-op + /// @param padding_l_size Size of left and top paddings of depthwise post-op /// @param mask Output scaling factors correspondence mask that defines the /// correspondence between the output tensor dimensions and the /// @p scales array. The set i-th bit indicates that a dedicated output @@ -3109,14 +3111,17 @@ struct post_ops : public handle { /// tensor. /// @param scales Output pointer to a constant array of float scaling /// factors. - void append_dw_k3s1p1(memory::data_type weights_data_type, + void append_dw(memory::data_type weights_data_type, memory::data_type bias_data_type, memory::data_type dst_data_type, - int mask, const std::vector &scales) { + memory::dim kernel_size, memory::dim stride_size, + memory::dim padding_l_size, int mask, + const std::vector &scales) { - error::wrap_c_api(dnnl_post_ops_append_dw_k3s1p1(get(), + error::wrap_c_api(dnnl_post_ops_append_dw(get(), memory::convert_to_c(weights_data_type), memory::convert_to_c(bias_data_type), memory::convert_to_c(dst_data_type), + kernel_size, stride_size, padding_l_size, scales.size(), mask, scales.data()), "could not append depthwise post-op"); } @@ -3135,24 +3140,34 @@ struct post_ops : public handle { /// tensor. /// @param scales Output pointer to a constant array of float scaling /// factors. - void get_params_dw_k3s1p1(int index, memory::data_type &weights_data_type, + void get_params_dw(int index, memory::data_type &weights_data_type, memory::data_type &bias_data_type, memory::data_type &dst_data_type, - int &mask, std::vector &scales) const { + memory::dim &kernel_size, memory::dim &stride_size, + memory::dim &padding_l_size, int &mask, + std::vector &scales) const { dnnl_data_type_t c_weights_data_type; dnnl_data_type_t c_bias_data_type; dnnl_data_type_t c_dst_data_type; + dnnl_dim_t c_kernel_size; + dnnl_dim_t c_stride_size; + dnnl_dim_t c_padding_l_size; dnnl_dim_t count; int c_mask; const float *c_scales; - error::wrap_c_api(dnnl_post_ops_get_params_dw_k3s1p1(get(), index, - &c_weights_data_type, &c_bias_data_type, - &c_dst_data_type, &count, &c_mask, &c_scales), + error::wrap_c_api( + dnnl_post_ops_get_params_dw(get(), index, &c_weights_data_type, + &c_bias_data_type, &c_dst_data_type, &c_kernel_size, + &c_stride_size, &c_padding_l_size, &count, &c_mask, + &c_scales), "could not get parameters of depthwise post-op"); weights_data_type = static_cast(c_weights_data_type); bias_data_type = static_cast(c_bias_data_type); dst_data_type = static_cast(c_dst_data_type); + kernel_size = c_kernel_size; + stride_size = c_stride_size; + padding_l_size = c_padding_l_size; scales.resize(count); mask = c_mask; @@ -3161,6 +3176,67 @@ struct post_ops : public handle { return; } + /// Appends a depthwise post-op convolution with stride 1. + /// + /// This post-op can only be fused with a 2D 1x1 convolution (convolution + /// with weights spatial dimension equal to 1 i.e., kh=kw=1). + /// + /// The kind of this post-op is #dnnl_convolution. + /// + /// The number of outputs for primitive remain same as before. The output + /// size remain same as the original primitive due to stride=1. + /// + /// The Post-op can be defined as: + /// + /// dst[:] <- scales * (conv_dw(conv_1x1)) + /// + /// See @ref dev_guide_attributes_post_ops_depthwise and + /// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info. + /// + /// @param weights_data_type Weights data type of depthwise post-op + /// @param bias_data_type Bias data type of depthwise post-op + /// @param dst_data_type Output data type of depthwise post-op + /// @param mask Output scaling factors correspondence mask that defines the + /// correspondence between the output tensor dimensions and the + /// @p scales array. The set i-th bit indicates that a dedicated output + /// scaling factor is used for each index along that dimension. The mask + /// value of 0 implies a common scaling factor for the whole output + /// tensor. + /// @param scales Output pointer to a constant array of float scaling + /// factors. + void append_dw_k3s1p1(memory::data_type weights_data_type, + memory::data_type bias_data_type, memory::data_type dst_data_type, + int mask, const std::vector &scales) { + + append_dw(weights_data_type, bias_data_type, dst_data_type, 3, 1, 1, + mask, scales); + } + + /// Returns the parameters of an depthwise post-op with stride 1. + /// + /// @param index Index of the elementwise post-op. + /// @param weights_data_type Weights data type of depthwise post-op + /// @param bias_data_type Bias data type of depthwise post-op + /// @param dst_data_type Output data type of depthwise post-op + /// @param mask Output scaling factors correspondence mask that defines the + /// correspondence between the output tensor dimensions and the + /// @p scales array. The set i-th bit indicates that a dedicated output + /// scaling factor is used for each index along that dimension. The mask + /// value of 0 implies a common scaling factor for the whole output + /// tensor. + /// @param scales Output pointer to a constant array of float scaling + /// factors. + void get_params_dw_k3s1p1(int index, memory::data_type &weights_data_type, + memory::data_type &bias_data_type, memory::data_type &dst_data_type, + int &mask, std::vector &scales) const { + + memory::dim kernel_size; + memory::dim stride_size; + memory::dim padding_l_size; + get_params_dw(index, weights_data_type, bias_data_type, dst_data_type, + kernel_size, stride_size, padding_l_size, mask, scales); + } + /// Appends a depthwise post-op convolution with stride 2. /// /// This post-op can only be fused with a 2D 1x1 convolution (convolution @@ -3197,13 +3273,8 @@ struct post_ops : public handle { void append_dw_k3s2p1(memory::data_type weights_data_type, memory::data_type bias_data_type, memory::data_type dst_data_type, int mask, const std::vector &scales) { - - error::wrap_c_api(dnnl_post_ops_append_dw_k3s2p1(get(), - memory::convert_to_c(weights_data_type), - memory::convert_to_c(bias_data_type), - memory::convert_to_c(dst_data_type), - scales.size(), mask, scales.data()), - "could not append depthwise post-op"); + append_dw(weights_data_type, bias_data_type, dst_data_type, 3, 2, 1, + mask, scales); } /// Returns the parameters of an depthwise post-op with stride 2. @@ -3224,26 +3295,11 @@ struct post_ops : public handle { memory::data_type &bias_data_type, memory::data_type &dst_data_type, int &mask, std::vector &scales) const { - dnnl_data_type_t c_weights_data_type; - dnnl_data_type_t c_bias_data_type; - dnnl_data_type_t c_dst_data_type; - dnnl_dim_t count; - int c_mask; - const float *c_scales; - error::wrap_c_api(dnnl_post_ops_get_params_dw_k3s2p1(get(), index, - &c_weights_data_type, &c_bias_data_type, - &c_dst_data_type, &count, &c_mask, &c_scales), - "could not get parameters of depthwise post-op"); - - weights_data_type = static_cast(c_weights_data_type); - bias_data_type = static_cast(c_bias_data_type); - dst_data_type = static_cast(c_dst_data_type); - scales.resize(count); - - mask = c_mask; - for (dnnl_dim_t c = 0; c < count; ++c) - scales[c] = c_scales[c]; - return; + memory::dim kernel_size; + memory::dim stride_size; + memory::dim padding_l_size; + get_params_dw(index, weights_data_type, bias_data_type, dst_data_type, + kernel_size, stride_size, padding_l_size, mask, scales); } /// Appends a binary post-op. diff --git a/scripts/verbose_converter/src/benchdnn_generator.py b/scripts/verbose_converter/src/benchdnn_generator.py index c3551f0f9d0..1c5b6255bd2 100644 --- a/scripts/verbose_converter/src/benchdnn_generator.py +++ b/scripts/verbose_converter/src/benchdnn_generator.py @@ -449,7 +449,8 @@ def convert_binary_post_op(post_op): def convert_dw_post_op(post_op): policy = convert_scale_policy(post_op['scales']['mask']) - po = post_op['alg'] + ':' + post_op['dst_dt'] + ':' + policy + po = post_op['alg'] + ':' + post_op['ksp'] + ':' + post_op[ + 'dst_dt'] + ':' + policy if post_op['scales']['value'] != None: po += ':' + post_op['scales']['value'] return po diff --git a/scripts/verbose_converter/src/dnnl_parser.py b/scripts/verbose_converter/src/dnnl_parser.py index 226bb1a2183..5d294d37f27 100644 --- a/scripts/verbose_converter/src/dnnl_parser.py +++ b/scripts/verbose_converter/src/dnnl_parser.py @@ -131,6 +131,7 @@ def convert_binary_post_op(value): def convert_dw_post_op(value): p_op = { 'alg': '', + 'ksp': '', 'dst_dt': 'f32', 'wei_dt': 'f32', 'scales': { @@ -141,13 +142,14 @@ def convert_dw_post_op(value): params = value.split(':') len_params = len(params) p_op['alg'] = params[0] - if len_params > 1: - p_op['dst_dt'] = params[1] + p_op['ksp'] = params[1] if len_params > 2: - p_op['wei_dt'] = 's8' - p_op['scales']['mask'] = params[2] + p_op['dst_dt'] = params[2] if len_params > 3: - p_op['scales']['value'] = params[3] + p_op['wei_dt'] = 's8' + p_op['scales']['mask'] = params[3] + if len_params > 4: + p_op['scales']['value'] = params[4] return p_op def convert_eltwise_post_op(value): diff --git a/src/common/primitive_attr.cpp b/src/common/primitive_attr.cpp index b43dd5af397..d5c3b47a30e 100644 --- a/src/common/primitive_attr.cpp +++ b/src/common/primitive_attr.cpp @@ -221,18 +221,28 @@ dnnl::impl::status_t post_ops_t::entry_t::set_depthwise_scales( return dnnl::impl::status::success; } -status_t post_ops_t::append_dw_k3s1p1(data_type_t wei_dt, data_type_t bias_dt, - data_type_t dst_dt, dim_t count, int mask, const float *scales) { +status_t post_ops_t::append_dw(data_type_t wei_dt, data_type_t bias_dt, + data_type_t dst_dt, dim_t kernel_size, dim_t stride_size, + dim_t padding_l_size, dim_t count, int mask, const float *scales) { if (len() == post_ops_limit) return out_of_memory; bool ok = wei_dt != data_type::undef && dst_dt != data_type::undef && IMPLICATION(count > 0, scales) && mask >= 0; if (!ok) return invalid_arguments; + ok = ok && kernel_size > 0 && stride_size > 0; + if (!ok) return invalid_arguments; + + // Avoiding cases when kernel in pad area + ok = ok && (padding_l_size + 1) <= kernel_size; + if (!ok) return invalid_arguments; + entry_.emplace_back(); auto &e = entry_.back(); e.kind = primitive_kind::convolution; auto &d = e.depthwise_conv; - d.stride = 1; + d.kernel = kernel_size; + d.stride = stride_size; + d.padding = padding_l_size; d.wei_dt = wei_dt; d.bias_dt = bias_dt; d.dst_dt = dst_dt; @@ -243,17 +253,6 @@ status_t post_ops_t::append_dw_k3s1p1(data_type_t wei_dt, data_type_t bias_dt, return e.set_depthwise_scales(scales); } -status_t post_ops_t::append_dw_k3s2p1(data_type_t wei_dt, data_type_t bias_dt, - data_type_t dst_dt, dim_t count, int mask, const float *scales) { - - auto status - = append_dw_k3s1p1(wei_dt, bias_dt, dst_dt, count, mask, scales); - if (status != success) return status; - entry_.back().depthwise_conv.stride = 2; - - return success; -} - status_t post_ops_t::append_binary( alg_kind_t alg, const memory_desc_t *user_src1_desc) { if (len() == post_ops_limit) return out_of_memory; @@ -610,13 +609,45 @@ status_t dnnl_post_ops_get_params_eltwise(const post_ops_t *post_ops, int index, return success; } +status_t dnnl_post_ops_append_dw(post_ops_t *post_ops, data_type_t wei_dt, + data_type_t bias_dt, data_type_t dst_dt, dim_t kernel_size, + dim_t stride_size, dim_t padding_l_size, dim_t count, int mask, + const float *scales) { + if (post_ops == nullptr) return invalid_arguments; + + return post_ops->append_dw(wei_dt, bias_dt, dst_dt, kernel_size, + stride_size, padding_l_size, count, mask, scales); +} + +status_t dnnl_post_ops_get_params_dw(const post_ops_t *post_ops, int index, + data_type_t *wei_dt, data_type_t *bias_dt, data_type_t *dst_dt, + dim_t *kernel, dim_t *stride, dim_t *padding, dim_t *count, int *mask, + const float **scales) { + + if (!simple_get_params_check(post_ops, index, primitive_kind::convolution)) + return invalid_arguments; + + const auto &d = post_ops->entry_[index].depthwise_conv; + if (wei_dt) *wei_dt = d.wei_dt; + if (bias_dt) *bias_dt = d.bias_dt; + if (dst_dt) *dst_dt = d.dst_dt; + if (kernel) *kernel = d.kernel; + if (stride) *stride = d.stride; + if (padding) *padding = d.padding; + if (count) *count = d.count; + if (mask) *mask = d.mask; + if (scales) *scales = d.scales; + + return success; +} + status_t dnnl_post_ops_append_dw_k3s1p1(post_ops_t *post_ops, data_type_t wei_dt, data_type_t bias_dt, data_type_t dst_dt, dim_t count, int mask, const float *scales) { if (post_ops == nullptr) return invalid_arguments; - return post_ops->append_dw_k3s1p1( - wei_dt, bias_dt, dst_dt, count, mask, scales); + return post_ops->append_dw( + wei_dt, bias_dt, dst_dt, 3, 1, 1, count, mask, scales); } status_t dnnl_post_ops_get_params_dw_k3s1p1(const post_ops_t *post_ops, @@ -643,8 +674,8 @@ status_t dnnl_post_ops_append_dw_k3s2p1(post_ops_t *post_ops, dim_t count, int mask, const float *scales) { if (post_ops == nullptr) return invalid_arguments; - return post_ops->append_dw_k3s2p1( - wei_dt, bias_dt, dst_dt, count, mask, scales); + return post_ops->append_dw( + wei_dt, bias_dt, dst_dt, 3, 2, 1, count, mask, scales); } status_t dnnl_post_ops_get_params_dw_k3s2p1(const post_ops_t *post_ops, diff --git a/src/common/primitive_attr.hpp b/src/common/primitive_attr.hpp index 86b4b268ceb..cefab214f4b 100644 --- a/src/common/primitive_attr.hpp +++ b/src/common/primitive_attr.hpp @@ -355,7 +355,9 @@ struct dnnl_post_ops : public dnnl::impl::c_compatible { }; struct depthwise_conv_t { - int stride; + dnnl::impl::dim_t kernel; + dnnl::impl::dim_t stride; + dnnl::impl::dim_t padding; dnnl::impl::data_type_t wei_dt; dnnl::impl::data_type_t bias_dt; dnnl::impl::data_type_t dst_dt; @@ -449,7 +451,11 @@ struct dnnl_post_ops : public dnnl::impl::c_compatible { break; case primitive_kind::convolution: // Depthwise Only - ret = depthwise_conv.stride == rhs.depthwise_conv.stride + ret = depthwise_conv.kernel == rhs.depthwise_conv.kernel + && depthwise_conv.stride + == rhs.depthwise_conv.stride + && depthwise_conv.padding + == rhs.depthwise_conv.padding && depthwise_conv.wei_dt == rhs.depthwise_conv.wei_dt && depthwise_conv.bias_dt @@ -515,12 +521,11 @@ struct dnnl_post_ops : public dnnl::impl::c_compatible { dnnl::impl::data_type_t dt = dnnl_data_type_undef); dnnl::impl::status_t append_eltwise( float scale, dnnl::impl::alg_kind_t alg, float alpha, float beta); - dnnl::impl::status_t append_dw_k3s1p1(dnnl::impl::data_type_t wei_dt, + dnnl::impl::status_t append_dw(dnnl::impl::data_type_t wei_dt, dnnl::impl::data_type_t bias_dt, dnnl::impl::data_type_t dst_dt, - dnnl::impl::dim_t count, int mask, const float *scales); - dnnl::impl::status_t append_dw_k3s2p1(dnnl::impl::data_type_t wei_dt, - dnnl::impl::data_type_t bias_dt, dnnl::impl::data_type_t dst_dt, - dnnl::impl::dim_t count, int mask, const float *scales); + dnnl::impl::dim_t kernel_size, dnnl::impl::dim_t stride_size, + dnnl::impl::dim_t padding_l_size, dnnl::impl::dim_t count, int mask, + const float *scales); dnnl::impl::status_t append_binary(dnnl::impl::alg_kind_t alg, const dnnl::impl::memory_desc_t *user_src1_desc); dnnl::impl::status_t append_prelu(int mask); diff --git a/src/common/primitive_hashing.cpp b/src/common/primitive_hashing.cpp index 874b9078165..c34b1adddc7 100644 --- a/src/common/primitive_hashing.cpp +++ b/src/common/primitive_hashing.cpp @@ -255,8 +255,12 @@ size_t get_attr_hash(const primitive_attr_t &attr) { seed = hash_combine(seed, static_cast(entry.sum.dt)); break; case primitive_kind::convolution: + seed = hash_combine( + seed, static_cast(entry.depthwise_conv.kernel)); seed = hash_combine( seed, static_cast(entry.depthwise_conv.stride)); + seed = hash_combine(seed, + static_cast(entry.depthwise_conv.padding)); seed = hash_combine( seed, static_cast(entry.depthwise_conv.wei_dt)); seed = hash_combine(seed, diff --git a/src/common/serialization.cpp b/src/common/serialization.cpp index 3ebc836062f..0a82255685e 100644 --- a/src/common/serialization.cpp +++ b/src/common/serialization.cpp @@ -181,7 +181,9 @@ void serialize_attr( sstream.write(&entry.sum.dt); break; case primitive_kind::convolution: + sstream.write(&entry.depthwise_conv.kernel); sstream.write(&entry.depthwise_conv.stride); + sstream.write(&entry.depthwise_conv.padding); sstream.write(&entry.depthwise_conv.wei_dt); sstream.write(&entry.depthwise_conv.bias_dt); sstream.write(&entry.depthwise_conv.dst_dt); diff --git a/src/common/verbose.cpp b/src/common/verbose.cpp index 3beb44b73b5..5cc041f6fb8 100644 --- a/src/common/verbose.cpp +++ b/src/common/verbose.cpp @@ -435,7 +435,8 @@ std::ostream &operator<<(std::ostream &ss, const primitive_attr_t *attr) { case primitive_kind::convolution: { using namespace data_type; const auto &c = e.depthwise_conv; - ss << delim << "dw_k3s" << c.stride << "p1"; + ss << delim << "dw:k" << c.kernel << "s" << c.stride << "p" + << c.padding; if (c.wei_dt == s8 || c.dst_dt != f32) ss << ":" << c.dst_dt; if (c.count > 0 && c.wei_dt == s8) { diff --git a/src/cpu/dw_convolution_utils.hpp b/src/cpu/dw_convolution_utils.hpp index 23d581eee59..bfa2cd2423d 100644 --- a/src/cpu/dw_convolution_utils.hpp +++ b/src/cpu/dw_convolution_utils.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2021 Intel Corporation +* Copyright 2020-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,17 +63,26 @@ inline status_t get_depthwise_conv_desc(convolution_desc_t &cd_dw, const auto g = src_dw_d.dims()[1]; const auto ih = src_dw_d.dims()[ndims - 2]; const auto iw = src_dw_d.dims()[ndims - 1]; + const auto kernel = dw_po.kernel; const auto stride = dw_po.stride; + const auto padding = dw_po.padding; - const dims_t weights_tz = {g, 1, 1, 3, 3}; + const dims_t weights_tz = {g, 1, 1, kernel, kernel}; - const dims_t dst_tz - = {n, oc, utils::div_up(ih, stride), utils::div_up(iw, stride)}; + // Not following standard convolution formula for output shapes since + // right/top padding might be greated than left/top one. + const dim_t oh = utils::div_up(ih, stride); + const dim_t ow = utils::div_up(iw, stride); + const dims_t dst_tz = {n, oc, oh, ow}; const dims_t bias_tz = {oc}; - const dims_t pad_tz = {1, 1}; + const dims_t pad_tz = {padding, padding}; const dims_t stride_tz = {stride, stride}; + const dim_t pad_h_r = (oh - 1) * stride - ih + kernel - padding; + const dim_t pad_w_r = (ow - 1) * stride - iw + kernel - padding; + const dims_t pad_r_tz = {pad_h_r, pad_w_r}; + memory_desc_t src_md, weights_md, bias_md, dst_md; const auto src_dw_tag = src_dw_d.matches_one_of_tag( @@ -97,7 +106,7 @@ inline status_t get_depthwise_conv_desc(convolution_desc_t &cd_dw, CHECK(conv_desc_init(&cd_dw, prop_kind::forward_inference, alg_kind::convolution_auto, &src_md, &weights_md, with_bias ? &bias_md : nullptr, &dst_md, stride_tz, nullptr, pad_tz, - pad_tz)); + pad_r_tz)); return status::success; } diff --git a/src/cpu/x64/jit_avx512_common_1x1_convolution.cpp b/src/cpu/x64/jit_avx512_common_1x1_convolution.cpp index 291e0c8587d..57f50b514e7 100644 --- a/src/cpu/x64/jit_avx512_common_1x1_convolution.cpp +++ b/src/cpu/x64/jit_avx512_common_1x1_convolution.cpp @@ -134,7 +134,6 @@ void jit_avx512_common_1x1_convolution_fwd_t addrs; // End @@ -191,8 +190,9 @@ void jit_avx512_common_1x1_convolution_fwd_tdw_conv_pd_->jcp_.kh) * row_offset + : &dst[dst_off]; p.bias_data = bias ? &bias[oc_off_idx * (is_dst_layout_nxc ? 1 : jcp.oc_block)] : nullptr; diff --git a/src/cpu/x64/jit_sse41_1x1_convolution.cpp b/src/cpu/x64/jit_sse41_1x1_convolution.cpp index cfa93ed7a91..49d55dfb22a 100644 --- a/src/cpu/x64/jit_sse41_1x1_convolution.cpp +++ b/src/cpu/x64/jit_sse41_1x1_convolution.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2017-2021 Intel Corporation +* Copyright 2017-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,7 +103,6 @@ void jit_sse41_1x1_convolution_fwd_t::execute_forward_thr(const int ithr, data_t *pbuf {nullptr}; size_t row_offset {}; const int nb_buffer = jcp.nb_load_blocking; - const int jcp_dw_kh = 3; std::vector addrs; auto step = [](int default_step, int remaining, int tail_step) { @@ -147,7 +146,7 @@ void jit_sse41_1x1_convolution_fwd_t::execute_forward_thr(const int ithr, const int oc_off_idx = (is_dst_layout_nxc ? jcp.oc_block : 1) * _ocb; par_conv.output_data = jcp.with_dw_conv - ? pbuf + (oh % jcp_dw_kh) * row_offset + ? pbuf + (oh % pd()->dw_conv_pd_->jcp_.kh) * row_offset : &dst[data_blk_off(dst_d, n, oc_off_idx, oh, ow)]; par_conv.bias_data = &bias[_ocb * jcp.oc_block]; diff --git a/tests/benchdnn/conv/conv_dw_fusion.cpp b/tests/benchdnn/conv/conv_dw_fusion.cpp index a61851f79ab..2e96b4ec527 100644 --- a/tests/benchdnn/conv/conv_dw_fusion.cpp +++ b/tests/benchdnn/conv/conv_dw_fusion.cpp @@ -201,7 +201,9 @@ std::unique_ptr get_fused_conv_prb(const prb_t *prb) { << fused_conv_po.dst_dt; auto p_dw_cfg = conv::str2cfg(dw_cfg_ss.str().c_str()); - auto stride = fused_conv_po.stride; + const auto kernel = fused_conv_po.kernel; + const auto stride = fused_conv_po.stride; + const auto padding = fused_conv_po.padding; bool is_3d = prb->ndims >= 5; bool is_2d = prb->ndims >= 4; @@ -213,18 +215,21 @@ std::unique_ptr get_fused_conv_prb(const prb_t *prb) { cd.ih = is_2d ? prb->oh : 1; cd.iw = prb->ow; cd.oc = prb->oc; - cd.od = is_3d ? div_up(cd.id, stride) : 1; - cd.oh = is_2d ? div_up(cd.ih, stride) : 1; - cd.ow = div_up(cd.iw, stride); - cd.kd = is_3d ? 3 : 1; - cd.kh = is_2d ? 3 : 1; - cd.kw = 3; + cd.kd = is_3d ? kernel : 1; + cd.kh = is_2d ? kernel : 1; + cd.kw = kernel; cd.sd = is_3d ? stride : 1; cd.sh = is_2d ? stride : 1; cd.sw = stride; - cd.pd = is_3d; - cd.ph = is_2d; - cd.pw = 1; + cd.pd = is_3d ? padding : 0; + cd.ph = is_2d ? padding : 0; + cd.pw = padding; + // Not following standard convolution formula for output shapes since + // right/top padding might be greated than left/top one. + cd.od = is_3d ? div_up(cd.id, stride) : 1; + cd.oh = is_2d ? div_up(cd.ih, stride) : 1; + cd.ow = div_up(cd.iw, stride); + cd.has_groups = true; cd.ndims = prb->ndims; cd.init_pad_r(false); // is_deconv = false for conv descriptor diff --git a/tests/benchdnn/dnn_types.cpp b/tests/benchdnn/dnn_types.cpp index 34231f358e6..896da5f4be3 100644 --- a/tests/benchdnn/dnn_types.cpp +++ b/tests/benchdnn/dnn_types.cpp @@ -298,6 +298,7 @@ static po_table_entry_t kind_table[] = { // sum {pk_t::SUM, {"sum"}, dnnl_alg_kind_undef}, // depthwise convolution + {pk_t::DW, {"dw"}, dnnl_convolution_auto}, {pk_t::DW_K3S1P1, {"dw_k3s1p1"}, dnnl_convolution_auto}, {pk_t::DW_K3S2P1, {"dw_k3s2p1"}, dnnl_convolution_auto}, // eltwise @@ -438,21 +439,6 @@ int attr_t::post_ops_t::from_str(const std::string &s) { *this = post_ops_t(); if (s.empty()) return OK; - // TODO: remove me after a while - if (s.front() == '\'' || s.back() == '\'') { - BENCHDNN_PRINT(0, "%s\n", - "ERROR: `--attr-post-ops` no longer requires opening and " - "closing `'` (and `\"` for CLI) quotes. Please discard them to " - "proceed with successful parsing."); - return FAIL; - } else if (s.find_first_of(";", 0) != std::string::npos) { - BENCHDNN_PRINT(0, "%s\n", - "ERROR: `--attr-post-ops` no longer accepts `;` as post-ops " - "delimiter. Please use `+` as a delimiter between several " - "post-ops, i.e. `--attr-post-ops=sum+relu`."); - return FAIL; - } - size_t start_pos = 0; while (start_pos != std::string::npos) { auto subs = parser::get_substr(s, start_pos, '+'); @@ -481,6 +467,27 @@ int attr_t::post_ops_t::from_str(const std::string &s) { // sum dt, if specified, should be defined if (e.sum.dt == dnnl_data_type_undef) return FAIL; } else if (e.is_convolution_kind()) { + if (kind == DW) { + // `DW` has input of `dw:kXsYpZ`, while rest have `dw_k3sXp1`. + const auto str_dw_params + = parser::get_substr(subs, subs_pos, ':'); + size_t pos = 0, idx = 0; + + pos += idx; + if (str_dw_params[pos] != 'k') return FAIL; + e.convolution.kernel = std::stoi(&str_dw_params[++pos], &idx); + + pos += idx; + if (str_dw_params[pos] != 's') return FAIL; + e.convolution.stride = std::stoi(&str_dw_params[++pos], &idx); + + pos += idx; + if (str_dw_params[pos] != 'p') return FAIL; + e.convolution.padding = std::stoi(&str_dw_params[++pos]); + + if (subs_pos == std::string::npos) continue; + } + e.convolution.dst_dt = str2dt(parser::get_substr(subs, subs_pos, ':').c_str()); if (e.convolution.dst_dt == dnnl_data_type_undef) return FAIL; @@ -548,7 +555,7 @@ bool attr_t::post_ops_t::entry_t::is_sum_kind() const { return kind == SUM; } bool attr_t::post_ops_t::entry_t::is_convolution_kind() const { - return kind == DW_K3S1P1 || kind == DW_K3S2P1; + return kind == DW || kind == DW_K3S1P1 || kind == DW_K3S2P1; } bool attr_t::post_ops_t::entry_t::is_eltwise_kind() const { return kind > ELTWISE_START && kind < ELTWISE_END; @@ -709,6 +716,10 @@ std::ostream &operator<<(std::ostream &s, const attr_t::post_ops_t &post_ops) { s << ":" << e.sum.zero_point; if (e.sum.dt != dnnl_data_type_undef) s << ":" << e.sum.dt; } else if (e.is_convolution_kind()) { + if (e.kind == pk_t::DW) { + s << ":k" << e.convolution.kernel << "s" << e.convolution.stride + << "p" << e.convolution.padding; + } const auto &co = e.convolution.oscale; if (e.convolution.dst_dt != dnnl_f32 || !co.is_def()) s << ":" << e.convolution.dst_dt; @@ -963,11 +974,10 @@ dnnl_primitive_attr_t create_dnnl_attr( const auto count = scales ? os_args.get_count(policy) : 0; const auto mask = os_args.get_mask(policy); - const auto dnnl_post_ops_append_dw = e.convolution.stride == 1 - ? dnnl_post_ops_append_dw_k3s1p1 - : dnnl_post_ops_append_dw_k3s2p1; DNN_SAFE_V(dnnl_post_ops_append_dw(ops, wei_dt, bia_dt, - e.convolution.dst_dt, count, mask, scales)); + e.convolution.dst_dt, e.convolution.kernel, + e.convolution.stride, e.convolution.padding, count, + mask, scales)); } else if (e.is_eltwise_kind()) { DNN_SAFE_V(dnnl_post_ops_append_eltwise(ops, e.eltwise.scale, e.eltwise.alg, e.eltwise.alpha, e.eltwise.beta)); diff --git a/tests/benchdnn/dnn_types.hpp b/tests/benchdnn/dnn_types.hpp index 811e33c6dc1..5ec7b463433 100644 --- a/tests/benchdnn/dnn_types.hpp +++ b/tests/benchdnn/dnn_types.hpp @@ -200,6 +200,7 @@ struct attr_t { // sum SUM, // depthwise convolution + DW, DW_K3S1P1, DW_K3S2P1, // eltwise @@ -264,8 +265,12 @@ struct attr_t { } else if (is_eltwise_kind()) { eltwise.alg = kind2dnnl_kind(kind); } else if (is_convolution_kind()) { - convolution.stride = kind == DW_K3S1P1 ? 1 : 2; convolution.oscale = scale_t(); + if (kind != DW) { + convolution.kernel = 3; + convolution.stride = kind == DW_K3S1P1 ? 1 : 2; + convolution.padding = 1; + } } else if (is_binary_kind()) { binary.alg = kind2dnnl_kind(kind); } @@ -284,7 +289,9 @@ struct attr_t { float scale = 1.f; } eltwise; struct { + int kernel = 0; int stride = 0; + int padding = 0; dnnl_data_type_t dst_dt = dnnl_f32; scale_t oscale; } convolution; diff --git a/tests/benchdnn/doc/knobs_attr.md b/tests/benchdnn/doc/knobs_attr.md index 8a99b9b3e95..45526ae940e 100644 --- a/tests/benchdnn/doc/knobs_attr.md +++ b/tests/benchdnn/doc/knobs_attr.md @@ -8,8 +8,7 @@ --attr-zero-points=ARG:POLICY:ZEROPOINT[*][+...] --attr-post-ops=SUM[:SCALE[:ZERO_POINT[:DATA_TYPE]]] ELTWISE[:ALPHA[:BETA[:SCALE]]] - DW_K3S1P1[:DST_DT[:OUTPUTSCALE]] - DW_K3S2P1[:DST_DT[:OUTPUTSCALE]] + DW:KkSsPp[:DST_DT[:OUTPUTSCALE]] BINARY:DT[:POLICY[:TAG]] ``` @@ -118,8 +117,8 @@ specified. `SCALE` has same notation and semantics as for `SUM` kind, but requires both `ALPHA` and `BETA` to be specified. `SCALE` is applicable only when output tensor has integer data type. -`DW_K3S1P1` and `DW_K3S2P1` post operation kinds append depthwise convolution -with kernel size of 3, strides of 1 and 2 correspondently and paddings of 1. +`DW:KkSsPp` post operation kind appends depthwise convolution with kernel size +of `k`, stride size of `s`, and left padding size of `p`. These kinds are applicable only for convolution operation with kernel size of 1 as of now. They support optional argument `DST_DT`, which defines destination tensor data type. Refer to [data types](knobs_dt.md) for details. Optional diff --git a/tests/benchdnn/inputs/conv/harness_conv_fused_depthwise b/tests/benchdnn/inputs/conv/harness_conv_fused_depthwise index 3263c136af1..a008db3e571 100644 --- a/tests/benchdnn/inputs/conv/harness_conv_fused_depthwise +++ b/tests/benchdnn/inputs/conv/harness_conv_fused_depthwise @@ -60,3 +60,291 @@ --cfg=u8s8u8 --attr-post-ops=relu:0.5+dw_k3s2p1:s32:per_oc:2.5+relu,dw_k3s2p1:f32:common:2 --batch=shapes_fused_large_src + + +# f32 dw with extended kernels, strides and padding. +--reset +--skip-impl= +--cfg=f32 +--mb=1,2,16 + +# effD1 +--attr-post-ops=dw:k3s1p1 +ic32oc16_ih320oh320kh1sh1dh0ph0_n"effD1_1.1" +ic24oc144_ih160oh160kh1sh1dh0ph0_n"effD1_1.2" +ic80oc480_ih40oh40kh1sh1dh0ph0_n"effD1_1.3" +ic192oc1152_ih20oh20kh1sh1dh0ph0_n"effD1_1.4" +ic320oc1920_ih20oh20kh1sh1dh0ph0_n"effD1_1.5" +ic88oc88_ih10oh10kh1sh1dh0ph0_n"effD1_1.6" +ic88oc88_ih20oh20kh1sh1dh0ph0_n"effD1_1.7" +ic112oc88_ih40oh40kh1sh1dh0ph0_n"effD1_1.8" +ic40oc88_ih80oh80kh1sh1dh0ph0_n"effD1_1.9" +ic88oc88_ih5oh5kh1sh1dh0ph0_n"effD1_1.10" + +--attr-post-ops=dw:k3s2p0 +ic16oc96_ih320oh320kh1sh1dh0ph0_n"effD1_2.1" +ic40oc240_ih80oh80kh1sh1dh0ph0_n"effD1_2.2" + +--attr-post-ops=dw:k5s2p1 +ic24oc144_ih160oh160kh1sh1dh0ph0_n"effD1_3.1" +ic112oc672_ih40oh40kh1sh1dh0ph0_n"effD1_3.2" + +--attr-post-ops=dw:k5s1p2 +ic40oc240_ih80oh80kh1sh1dh0ph0_n"effD1_4.1" +ic80oc480_ih40oh40kh1sh1dh0ph0_n"effD1_4.2" +ic112oc672_ih40oh40kh1sh1dh0ph0_n"effD1_4.3" +ic192oc1152_ih20oh20kh1sh1dh0ph0_n"effD1_4.4" + +# effD4 +--attr-post-ops=dw:k3s1p1 +ic48oc24_ih512oh512kh1sh1dh0ph0_n"effD4_1.1" +ic32oc192_ih256oh256kh1sh1dh0ph0_n"effD4_1.2" +ic112oc672_ih64oh64kh1sh1dh0ph0_n"effD4_1.3" +ic272oc1632_ih32oh32kh1sh1dh0ph0_n"effD4_1.4" +ic448oc2688_ih32oh32kh1sh1dh0ph0_n"effD4_1.5" +ic448oc224_ih8oh8kh1sh1dh0ph0_n"effD4_1.6" +ic448oc224_ih16oh16kh1sh1dh0ph0_n"effD4_1.7" +ic448oc224_ih32oh32kh1sh1dh0ph0_n"effD4_1.8" +ic448oc224_ih64oh64kh1sh1dh0ph0_n"effD4_1.9" +ic448oc224_ih128oh128kh1sh1dh0ph0_n"effD4_1.10" + +--attr-post-ops=dw:k3s2p0 +ic24oc144_ih512oh512kh1sh1dh0ph0_n"effD4_2.1" +ic56oc336_ih128oh128kh1sh1dh0ph0_n"effD4_2.2" + +--attr-post-ops=dw:k5s2p1 +ic32oc192_ih256oh256kh1sh1dh0ph0_n"effD4_3.1" +ic160oc960_ih64oh64kh1sh1dh0ph0_n"effD4_3.2" + +--attr-post-ops=dw:k5s1p2 +ic56oc336_ih128oh128kh1sh1dh0ph0_n"effD4_4.1" +ic112oc672_ih64oh64kh1sh1dh0ph0_n"effD4_4.2" +ic160oc960_ih64oh64kh1sh1dh0ph0_n"effD4_4.3" +ic272oc1632_ih32oh32kh1sh1dh0ph0_n"effD4_4.4" + +# faster_rcnn_nas_lowproposals_coco +--attr-post-ops=dw:k3s1p1 +ic42oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.1" +ic168oc168_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.2" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.3" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.4" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.5" + +--attr-post-ops=dw:k5s1p2 +ic42oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.1" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.2" +ic168oc168_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.3" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.4" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.5" + +--attr-post-ops=dw:k7s1p3 +ic96oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.1" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.2" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.3" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.4" + +# deeplab +--attr-post-ops=dw:k3s1p0 +ic64oc384_ih129oh129kh1sh1dh0ph0_n"deeplab_1.1" +ic96oc576_ih129oh129kh1sh1dh0ph0_n"deeplab_1.2" +ic160oc960_ih129oh129kh1sh1dh0ph0_n"deeplab_1.3" + +--attr-post-ops=dw:k3s1p1 +ic24oc144_ih257oh257kh1sh1dh0ph0_n"deeplab_2.1" +ic32oc192_ih129oh129kh1sh1dh0ph0_n"deeplab_2.2" + +--attr-post-ops=dw:k3s2p1 +ic16oc96_ih513oh513kh1sh1dh0ph0_n"deeplab_3.1" +ic24oc144_ih257oh257kh1sh1dh0ph0_n"deeplab_3.2" + +# deeplab_v3 +--attr-post-ops=dw:k3s1p0 +ic64oc384_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.1" +ic96oc576_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.2" +ic160oc960_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.3" + +--attr-post-ops=dw:k3s1p1 +ic24oc144_ih129oh129kh1sh1dh0ph0_n"deeplab_v3_2.1" +ic32oc192_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_2.2" + +--attr-post-ops=dw:k3s2p1 +ic16oc96_ih257oh257kh1sh1dh0ph0_n"deeplab_v3_3.1" +ic24oc144_ih129oh129kh1sh1dh0ph0_n"deeplab_v3_3.2" + +# rmnet_ssd +--attr-post-ops=dw:k3s1p1 +ic32oc8_ih200oh200kh1sh1dh0ph0_n"rmnet_ssd_1.1" +ic64oc16_ih100oh100kh1sh1dh0ph0_n"rmnet_ssd_1.2" +ic128oc32_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_1.3" +ic256oc64_ih25oh25kh1sh1dh0ph0_n"rmnet_ssd_1.4" +ic128oc128_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_1.5" +ic256oc256_ih25oh25kh1sh1dh0ph0_n"rmnet_ssd_1.6" + +--attr-post-ops=dw:k3s2p0 +ic32oc16_ih200oh200kh1sh1dh0ph0_n"rmnet_ssd_2.1" +ic64oc32_ih100oh100kh1sh1dh0ph0_n"rmnet_ssd_2.2" +ic128oc64_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_2.3" + +# nasnet_a_large_331 +--attr-post-ops=dw:k3s1p1 +ic42oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_1.1" +ic168oc168_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_1.2" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_1.3" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_1.4" + +--attr-post-ops=dw:k5s1p1 +ic42oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_2.1" +ic84oc84_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_2.2" +ic168oc168_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_2.3" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_2.4" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_2.5" + +--attr-post-ops=dw:k7s1p3 +ic96oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_3.1" +ic84oc84_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_3.2" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_3.3" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_3.4" + + +# bf16 dw with extended kernels, strides and padding. +--reset +--skip-impl= +--cfg=bf16bf16bf16 +--mb=1,2,16 + +# effD1 +--attr-post-ops=dw:k3s1p1:bf16 +ic32oc16_ih320oh320kh1sh1dh0ph0_n"effD1_1.1" +ic24oc144_ih160oh160kh1sh1dh0ph0_n"effD1_1.2" +ic80oc480_ih40oh40kh1sh1dh0ph0_n"effD1_1.3" +ic192oc1152_ih20oh20kh1sh1dh0ph0_n"effD1_1.4" +ic320oc1920_ih20oh20kh1sh1dh0ph0_n"effD1_1.5" +ic88oc88_ih10oh10kh1sh1dh0ph0_n"effD1_1.6" +ic88oc88_ih20oh20kh1sh1dh0ph0_n"effD1_1.7" +ic112oc88_ih40oh40kh1sh1dh0ph0_n"effD1_1.8" +ic40oc88_ih80oh80kh1sh1dh0ph0_n"effD1_1.9" +ic88oc88_ih5oh5kh1sh1dh0ph0_n"effD1_1.10" + +--attr-post-ops=dw:k3s2p0:bf16 +ic16oc96_ih320oh320kh1sh1dh0ph0_n"effD1_2.1" +ic40oc240_ih80oh80kh1sh1dh0ph0_n"effD1_2.2" + +--attr-post-ops=dw:k5s2p1:bf16 +ic24oc144_ih160oh160kh1sh1dh0ph0_n"effD1_3.1" +ic112oc672_ih40oh40kh1sh1dh0ph0_n"effD1_3.2" + +--attr-post-ops=dw:k5s1p2:bf16 +ic40oc240_ih80oh80kh1sh1dh0ph0_n"effD1_4.1" +ic80oc480_ih40oh40kh1sh1dh0ph0_n"effD1_4.2" +ic112oc672_ih40oh40kh1sh1dh0ph0_n"effD1_4.3" +ic192oc1152_ih20oh20kh1sh1dh0ph0_n"effD1_4.4" + +# effD4 +--attr-post-ops=dw:k3s1p1:bf16 +ic48oc24_ih512oh512kh1sh1dh0ph0_n"effD4_1.1" +ic32oc192_ih256oh256kh1sh1dh0ph0_n"effD4_1.2" +ic112oc672_ih64oh64kh1sh1dh0ph0_n"effD4_1.3" +ic272oc1632_ih32oh32kh1sh1dh0ph0_n"effD4_1.4" +ic448oc2688_ih32oh32kh1sh1dh0ph0_n"effD4_1.5" +ic448oc224_ih8oh8kh1sh1dh0ph0_n"effD4_1.6" +ic448oc224_ih16oh16kh1sh1dh0ph0_n"effD4_1.7" +ic448oc224_ih32oh32kh1sh1dh0ph0_n"effD4_1.8" +ic448oc224_ih64oh64kh1sh1dh0ph0_n"effD4_1.9" +ic448oc224_ih128oh128kh1sh1dh0ph0_n"effD4_1.10" + +--attr-post-ops=dw:k3s2p0:bf16 +ic24oc144_ih512oh512kh1sh1dh0ph0_n"effD4_2.1" +ic56oc336_ih128oh128kh1sh1dh0ph0_n"effD4_2.2" + +--attr-post-ops=dw:k5s2p1:bf16 +ic32oc192_ih256oh256kh1sh1dh0ph0_n"effD4_3.1" +ic160oc960_ih64oh64kh1sh1dh0ph0_n"effD4_3.2" + +--attr-post-ops=dw:k5s1p2:bf16 +ic56oc336_ih128oh128kh1sh1dh0ph0_n"effD4_4.1" +ic112oc672_ih64oh64kh1sh1dh0ph0_n"effD4_4.2" +ic160oc960_ih64oh64kh1sh1dh0ph0_n"effD4_4.3" +ic272oc1632_ih32oh32kh1sh1dh0ph0_n"effD4_4.4" + +# faster_rcnn_nas_lowproposals_coco +--attr-post-ops=dw:k3s1p1:bf16 +ic42oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.1" +ic168oc168_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.2" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.3" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.4" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_1.5" + +--attr-post-ops=dw:k5s1p2:bf16 +ic42oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.1" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.2" +ic168oc168_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.3" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.4" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_2.5" + +--attr-post-ops=dw:k7s1p3:bf16 +ic96oc42_ih300oh300kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.1" +ic84oc84_ih150oh150kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.2" +ic336oc336_ih75oh75kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.3" +ic672oc672_ih9oh9kh1sh1dh0ph0_n"faster_rcnn_nas_lowproposals_coco_3.4" + +# deeplab +--attr-post-ops=dw:k3s1p0:bf16 +ic64oc384_ih129oh129kh1sh1dh0ph0_n"deeplab_1.1" +ic96oc576_ih129oh129kh1sh1dh0ph0_n"deeplab_1.2" +ic160oc960_ih129oh129kh1sh1dh0ph0_n"deeplab_1.3" + +--attr-post-ops=dw:k3s1p1:bf16 +ic24oc144_ih257oh257kh1sh1dh0ph0_n"deeplab_2.1" +ic32oc192_ih129oh129kh1sh1dh0ph0_n"deeplab_2.2" + +--attr-post-ops=dw:k3s2p1:bf16 +ic16oc96_ih513oh513kh1sh1dh0ph0_n"deeplab_3.1" +ic24oc144_ih257oh257kh1sh1dh0ph0_n"deeplab_3.2" + +# deeplab_v3 +--attr-post-ops=dw:k3s1p0:bf16 +ic64oc384_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.1" +ic96oc576_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.2" +ic160oc960_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_1.3" + +--attr-post-ops=dw:k3s1p1:bf16 +ic24oc144_ih129oh129kh1sh1dh0ph0_n"deeplab_v3_2.1" +ic32oc192_ih65oh65kh1sh1dh0ph0_n"deeplab_v3_2.2" + +--attr-post-ops=dw:k3s2p1:bf16 +ic16oc96_ih257oh257kh1sh1dh0ph0_n"deeplab_v3_3.1" +ic24oc144_ih129oh129kh1sh1dh0ph0_n"deeplab_v3_3.2" + +# rmnet_ssd +--attr-post-ops=dw:k3s1p1:bf16 +ic32oc8_ih200oh200kh1sh1dh0ph0_n"rmnet_ssd_1.1" +ic64oc16_ih100oh100kh1sh1dh0ph0_n"rmnet_ssd_1.2" +ic128oc32_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_1.3" +ic256oc64_ih25oh25kh1sh1dh0ph0_n"rmnet_ssd_1.4" +ic128oc128_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_1.5" +ic256oc256_ih25oh25kh1sh1dh0ph0_n"rmnet_ssd_1.6" + +--attr-post-ops=dw:k3s2p0:bf16 +ic32oc16_ih200oh200kh1sh1dh0ph0_n"rmnet_ssd_2.1" +ic64oc32_ih100oh100kh1sh1dh0ph0_n"rmnet_ssd_2.2" +ic128oc64_ih50oh50kh1sh1dh0ph0_n"rmnet_ssd_2.3" + +# nasnet_a_large_331 +--attr-post-ops=dw:k3s1p1:bf16 +ic42oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_1.1" +ic168oc168_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_1.2" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_1.3" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_1.4" + +--attr-post-ops=dw:k5s1p1:bf16 +ic42oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_2.1" +ic84oc84_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_2.2" +ic168oc168_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_2.3" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_2.4" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_2.5" + +--attr-post-ops=dw:k7s1p3:bf16 +ic96oc42_ih83oh83kh1sh1dh0ph0_n"nasnet_a_large_331_3.1" +ic84oc84_ih42oh42kh1sh1dh0ph0_n"nasnet_a_large_331_3.2" +ic336oc336_ih21oh21kh1sh1dh0ph0_n"nasnet_a_large_331_3.3" +ic672oc672_ih11oh11kh1sh1dh0ph0_n"nasnet_a_large_331_3.4" diff --git a/tests/benchdnn/utils/parser.cpp b/tests/benchdnn/utils/parser.cpp index c17a20f788f..f46158bd17d 100644 --- a/tests/benchdnn/utils/parser.cpp +++ b/tests/benchdnn/utils/parser.cpp @@ -170,7 +170,7 @@ bool parse_attr_post_ops(std::vector &po, const char *str, = "POST-OPS\n Specifies post-ops attribute. `POST-OPS` syntax " "is one of those:\n * SUM[:SCALE[:ZERO_POINT[:DATA_TYPE]]]\n " " * ELTWISE[:ALPHA[:BETA[:SCALE]]]\n * " - "DW_K3S1P1[:DST_DT[:OUTPUTSCALE]]\n * " + "DW:KkSsPp[:DST_DT[:OUTPUTSCALE]]\n * " "BINARY:DT[:POLICY[:TAG]]\n More details at " "https://github.com/oneapi-src/oneDNN/blob/master/tests/benchdnn/" "doc/knobs_attr.md\n"; diff --git a/tests/gtests/test_iface_attr.cpp b/tests/gtests/test_iface_attr.cpp index a301600f9a6..42bbbddd777 100644 --- a/tests/gtests/test_iface_attr.cpp +++ b/tests/gtests/test_iface_attr.cpp @@ -462,6 +462,26 @@ HANDLE_EXCEPTIONS_FOR_TEST_F(attr_test_t, DepthwiseFusionPostop) { ASSERT_EQ(dst_dt, memory::data_type::f32); ASSERT_EQ(scales_mask, 0); ASSERT_EQ(scales_in, scales_out); + + scales_in = {}; + ops.append_dw(memory::data_type::s8, memory::data_type::f32, + memory::data_type::u8, 5, 2, 1, 0, scales_in); + attr.set_post_ops(ops); + + ASSERT_EQ(attr.get_post_ops().kind(3), primitive::kind::convolution); + + memory::dim kernel, stride, padding; + attr.get_post_ops().get_params_dw(3, wei_dt, bias_dt, dst_dt, kernel, + stride, padding, scales_mask, scales_out); + + ASSERT_EQ(wei_dt, memory::data_type::s8); + ASSERT_EQ(bias_dt, memory::data_type::f32); + ASSERT_EQ(dst_dt, memory::data_type::u8); + ASSERT_EQ(kernel, 5); + ASSERT_EQ(stride, 2); + ASSERT_EQ(padding, 1); + ASSERT_EQ(scales_mask, 0); + ASSERT_EQ(scales_in, scales_out); } HANDLE_EXCEPTIONS_FOR_TEST_F(attr_test_t, DepthwiseFusion) {