From 80f668c937923073dd504668230cf7221a2048b2 Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Tue, 24 Jan 2023 13:52:43 +0000
Subject: [PATCH 1/6] add: LPNormPool

---
 Project.toml        |  2 +-
 src/Flux.jl         |  2 +-
 src/layers/conv.jl  | 98 +++++++++++++++++++++++++++++++++++++++++++++
 test/layers/conv.jl |  4 ++
 4 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8292de97e2..da30569e78 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,7 +30,7 @@ ChainRulesCore = "1.12"
 Functors = "0.3, 0.4"
 MLUtils = "0.2, 0.3.1, 0.4"
 MacroTools = "0.5"
-NNlib = "0.8.15"
+NNlib = "0.8.17"
 NNlibCUDA = "0.2.6"
 OneHotArrays = "0.1, 0.2"
 Optimisers = "0.2.12"
diff --git a/src/Flux.jl b/src/Flux.jl
index afa47f6fc0..23954c8e17 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -21,7 +21,7 @@ Optimisers.base(dx::Zygote.Grads) = error("Optimisers.jl cannot be used with Zyg
 export Chain, Dense, Embedding, Maxout, SkipConnection, Parallel, PairwiseFusion,
        RNN, LSTM, GRU, GRUv3,
        SamePad, Conv, CrossCor, ConvTranspose, DepthwiseConv,
-       AdaptiveMaxPool, AdaptiveMeanPool, GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool,
+       AdaptiveMaxPool, AdaptiveMeanPool, GlobalMaxPool, GlobalMeanPool, GlobalLPNormPool, MaxPool, MeanPool, LPNormPool,
        Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
        Upsample, PixelShuffle,
        fmap, cpu, gpu, f32, f64, rand32, randn32, zeros32, ones32,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 003395c15d..42c1a34fde 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -633,6 +633,40 @@ function Base.show(io::IO, g::GlobalMeanPool)
   print(io, "GlobalMeanPool()")
 end
 
+"""
+    GlobalLPNormPool
+
+Global lp norm pooling layer.
+
+Transform (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
+by performing lp norm pooling on the complete (w,h)-shaped feature maps.
+
+See also [`LPNormPool`](@ref).
+
+```jldoctest
+julia> xs = rand(Float32, 100, 100, 3, 50)
+
+julia> m = Chain(Conv((3,3), 3 => 7), GlobalLPNormPool())
+
+julia> m(xs) |> size
+(1, 1, 7, 50)
+```
+"""
+struct GlobalLPNormPool
+  p::Number
+end
+
+function (g::GlobalLPNormPool)(x)
+  x_size = size(x)
+  k = x_size[1:end-2]
+  pdims = PoolDims(x, k)
+  return lpnormpool(x, pdims; p=g.p)
+end
+
+function Base.show(io::IO, g::GlobalLPNormPool)
+  print(io, "GlobalLPNormPool(p=", g.p, ")")
+end
+
 """
     MaxPool(window::NTuple; pad=0, stride=window)
 
@@ -754,3 +788,67 @@ function Base.show(io::IO, m::MeanPool)
   m.stride == m.k || print(io, ", stride=", _maybetuple_string(m.stride))
   print(io, ")")
 end
+
+"""
+    LPNormPool(window::NTuple, p::Number; pad=0, stride=window)
+
+Lp norm pooling layer, calculating p-norm distance for each window,
+also known as LPPool in pytorch.
+
+Expects as input an array with `ndims(x) == N+2`, i.e. channel and
+batch dimensions, after the `N` feature dimensions, where `N = length(window)`.
+
+By default the window size is also the stride in each dimension.
+The keyword `pad` accepts the same options as for the `Conv` layer,
+including `SamePad()`.
+
+See also [`Conv`](@ref), [`MaxPool`](@ref), [`GlobalLPNormPool`](@ref).
+
+# Examples
+
+```jldoctest
+julia> xs = rand(Float32, 100, 100, 3, 50);
+
+julia> m = Chain(Conv((5,5), 3 => 7), LPNormPool((5,5), 2; pad=SamePad()))
+Chain(
+  Conv((5, 5), 3 => 7),                 # 532 parameters
+  LPNormPool((5, 5), p=2, pad=2),
+)
+
+julia> m[1](xs) |> size
+(96, 96, 7, 50)
+
+julia> m(xs) |> size
+(20, 20, 7, 50)
+
+julia> layer = LPNormPool((5,), 2, pad=2, stride=(3,))  # one-dimensional window
+LPNormPool((5,), p=2, pad=2, stride=3)
+
+julia> layer(rand(Float32, 100, 7, 50)) |> size
+(34, 7, 50)
+```
+"""
+struct LPNormPool{N,M}
+  k::NTuple{N,Int}
+  p::Number
+  pad::NTuple{M,Int}
+  stride::NTuple{N,Int}
+end
+
+function LPNormPool(k::NTuple{N,Integer}, p::Number; pad = 0, stride = k) where N
+  stride = expand(Val(N), stride)
+  pad = calc_padding(LPNormPool, pad, k, 1, stride)
+  return LPNormPool(k, p, pad, stride)
+end
+
+function (l::LPNormPool)(x)
+  pdims = PoolDims(x, l.k; padding=l.pad, stride=l.stride)
+  return lpnormpool(x, pdims; p=l.p)
+end
+
+function Base.show(io::IO, l::LPNormPool)
+  print(io, "LPNormPool(", l.k, ", p=", l.p)
+  all(==(0), l.pad) || print(io, ", pad=", _maybetuple_string(l.pad))
+  l.stride == l.k || print(io, ", stride=", _maybetuple_string(l.stride))
+  print(io, ")")
+end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 019f3fd603..6acfce8f30 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -17,10 +17,14 @@ using Flux: gradient
   @test size(gmp(x)) == (1, 1, 3, 2)
   gmp = GlobalMeanPool()
   @test size(gmp(x)) == (1, 1, 3, 2)
+  glmp = GlobalLPNormPool(2)
+  @test size(glmp(x)) == (1, 1, 3, 2)
   mp = MaxPool((2, 2))
   @test mp(x) == maxpool(x, PoolDims(x, 2))
   mp = MeanPool((2, 2))
   @test mp(x) == meanpool(x, PoolDims(x, 2))
+  lnp = LPNormPool((2,2), 2)
+  @test lnp(x) == lpnormpool(x, PoolDims(x, 2); p=2)
 end
 
 @testset "CNN" begin

From bddbb4beefc349d7a70909c816ee5226e4e284e7 Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Wed, 25 Jan 2023 16:05:36 +0000
Subject: [PATCH 2/6] fix: jldoc error; Number => Float64; remove useless
 overload show

---
 src/layers/conv.jl  | 33 +++++++++++----------------------
 test/layers/conv.jl |  6 +++---
 2 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 42c1a34fde..06b508d1f4 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -595,10 +595,6 @@ function (g::GlobalMaxPool)(x)
   return maxpool(x, pdims)
 end
 
-function Base.show(io::IO, g::GlobalMaxPool)
-  print(io, "GlobalMaxPool()")
-end
-
 """
     GlobalMeanPool()
 
@@ -629,12 +625,8 @@ function (g::GlobalMeanPool)(x)
   return meanpool(x, pdims)
 end
 
-function Base.show(io::IO, g::GlobalMeanPool)
-  print(io, "GlobalMeanPool()")
-end
-
 """
-    GlobalLPNormPool
+    GlobalLPNormPool(p::Float64)
 
 Global lp norm pooling layer.
 
@@ -646,14 +638,14 @@ See also [`LPNormPool`](@ref).
 ```jldoctest
 julia> xs = rand(Float32, 100, 100, 3, 50)
 
-julia> m = Chain(Conv((3,3), 3 => 7), GlobalLPNormPool())
+julia> m = Chain(Conv((3,3), 3 => 7), GlobalLPNormPool(2.0))
 
 julia> m(xs) |> size
 (1, 1, 7, 50)
 ```
 """
 struct GlobalLPNormPool
-  p::Number
+  p::Float64
 end
 
 function (g::GlobalLPNormPool)(x)
@@ -663,10 +655,6 @@ function (g::GlobalLPNormPool)(x)
   return lpnormpool(x, pdims; p=g.p)
 end
 
-function Base.show(io::IO, g::GlobalLPNormPool)
-  print(io, "GlobalLPNormPool(p=", g.p, ")")
-end
-
 """
     MaxPool(window::NTuple; pad=0, stride=window)
 
@@ -790,7 +778,7 @@ function Base.show(io::IO, m::MeanPool)
 end
 
 """
-    LPNormPool(window::NTuple, p::Number; pad=0, stride=window)
+    LPNormPool(window::NTuple, p::Float64; pad=0, stride=window)
 
 Lp norm pooling layer, calculating p-norm distance for each window,
 also known as LPPool in pytorch.
@@ -802,14 +790,15 @@ By default the window size is also the stride in each dimension.
 The keyword `pad` accepts the same options as for the `Conv` layer,
 including `SamePad()`.
 
-See also [`Conv`](@ref), [`MaxPool`](@ref), [`GlobalLPNormPool`](@ref).
+See also [`Conv`](@ref), [`MaxPool`](@ref), [`GlobalLPNormPool`](@ref),
+[`pytorch LPPool`](https://pytorch.org/docs/stable/generated/torch.nn.LPPool2d.html).
 
 # Examples
 
 ```jldoctest
 julia> xs = rand(Float32, 100, 100, 3, 50);
 
-julia> m = Chain(Conv((5,5), 3 => 7), LPNormPool((5,5), 2; pad=SamePad()))
+julia> m = Chain(Conv((5,5), 3 => 7), LPNormPool((5,5), 2.0; pad=SamePad()))
 Chain(
   Conv((5, 5), 3 => 7),                 # 532 parameters
   LPNormPool((5, 5), p=2, pad=2),
@@ -821,7 +810,7 @@ julia> m[1](xs) |> size
 julia> m(xs) |> size
 (20, 20, 7, 50)
 
-julia> layer = LPNormPool((5,), 2, pad=2, stride=(3,))  # one-dimensional window
+julia> layer = LPNormPool((5,), 2.0, pad=2, stride=(3,))  # one-dimensional window
 LPNormPool((5,), p=2, pad=2, stride=3)
 
 julia> layer(rand(Float32, 100, 7, 50)) |> size
@@ -830,12 +819,12 @@ julia> layer(rand(Float32, 100, 7, 50)) |> size
 """
 struct LPNormPool{N,M}
   k::NTuple{N,Int}
-  p::Number
+  p::Float64
   pad::NTuple{M,Int}
   stride::NTuple{N,Int}
 end
 
-function LPNormPool(k::NTuple{N,Integer}, p::Number; pad = 0, stride = k) where N
+function LPNormPool(k::NTuple{N,Integer}, p::Float64; pad = 0, stride = k) where N
   stride = expand(Val(N), stride)
   pad = calc_padding(LPNormPool, pad, k, 1, stride)
   return LPNormPool(k, p, pad, stride)
@@ -847,7 +836,7 @@ function (l::LPNormPool)(x)
 end
 
 function Base.show(io::IO, l::LPNormPool)
-  print(io, "LPNormPool(", l.k, ", p=", l.p)
+  print(io, "LPNormPool(", l.k, ", ", l.p)
   all(==(0), l.pad) || print(io, ", pad=", _maybetuple_string(l.pad))
   l.stride == l.k || print(io, ", stride=", _maybetuple_string(l.stride))
   print(io, ")")
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 6acfce8f30..213d0198a0 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -17,14 +17,14 @@ using Flux: gradient
   @test size(gmp(x)) == (1, 1, 3, 2)
   gmp = GlobalMeanPool()
   @test size(gmp(x)) == (1, 1, 3, 2)
-  glmp = GlobalLPNormPool(2)
+  glmp = GlobalLPNormPool(2.0)
   @test size(glmp(x)) == (1, 1, 3, 2)
   mp = MaxPool((2, 2))
   @test mp(x) == maxpool(x, PoolDims(x, 2))
   mp = MeanPool((2, 2))
   @test mp(x) == meanpool(x, PoolDims(x, 2))
-  lnp = LPNormPool((2,2), 2)
-  @test lnp(x) == lpnormpool(x, PoolDims(x, 2); p=2)
+  lnp = LPNormPool((2,2), 2.0)
+  @test lnp(x) == lpnormpool(x, PoolDims(x, 2); p=2.0)
 end
 
 @testset "CNN" begin

From 85a6effbb2cc3a46d9795cc141fcb96097cf6e9f Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Wed, 25 Jan 2023 16:37:10 +0000
Subject: [PATCH 3/6] fix: concrete type Number => parameterization

---
 src/layers/conv.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 06b508d1f4..f1a431a124 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -626,7 +626,7 @@ function (g::GlobalMeanPool)(x)
 end
 
 """
-    GlobalLPNormPool(p::Float64)
+    GlobalLPNormPool(p::T)
 
 Global lp norm pooling layer.
 
@@ -636,16 +636,16 @@ by performing lp norm pooling on the complete (w,h)-shaped feature maps.
 See also [`LPNormPool`](@ref).
 
 ```jldoctest
-julia> xs = rand(Float32, 100, 100, 3, 50)
+julia> xs = rand(Float32, 100, 100, 3, 50);
 
-julia> m = Chain(Conv((3,3), 3 => 7), GlobalLPNormPool(2.0))
+julia> m = Chain(Conv((3,3), 3 => 7), GlobalLPNormPool(2.0));
 
 julia> m(xs) |> size
 (1, 1, 7, 50)
 ```
 """
-struct GlobalLPNormPool
-  p::Float64
+struct GlobalLPNormPool{T<:Number}
+  p::T
 end
 
 function (g::GlobalLPNormPool)(x)
@@ -778,7 +778,7 @@ function Base.show(io::IO, m::MeanPool)
 end
 
 """
-    LPNormPool(window::NTuple, p::Float64; pad=0, stride=window)
+    LPNormPool(window::NTuple, p::T; pad=0, stride=window)
 
 Lp norm pooling layer, calculating p-norm distance for each window,
 also known as LPPool in pytorch.
@@ -801,7 +801,7 @@ julia> xs = rand(Float32, 100, 100, 3, 50);
 julia> m = Chain(Conv((5,5), 3 => 7), LPNormPool((5,5), 2.0; pad=SamePad()))
 Chain(
   Conv((5, 5), 3 => 7),                 # 532 parameters
-  LPNormPool((5, 5), p=2, pad=2),
+  LPNormPool((5, 5), 2.0, pad=2),
 )
 
 julia> m[1](xs) |> size
@@ -811,20 +811,20 @@ julia> m(xs) |> size
 (20, 20, 7, 50)
 
 julia> layer = LPNormPool((5,), 2.0, pad=2, stride=(3,))  # one-dimensional window
-LPNormPool((5,), p=2, pad=2, stride=3)
+LPNormPool((5,), 2.0, pad=2, stride=3)
 
 julia> layer(rand(Float32, 100, 7, 50)) |> size
 (34, 7, 50)
 ```
 """
-struct LPNormPool{N,M}
+struct LPNormPool{N,M,T<:Number}
   k::NTuple{N,Int}
-  p::Float64
+  p::T
   pad::NTuple{M,Int}
   stride::NTuple{N,Int}
 end
 
-function LPNormPool(k::NTuple{N,Integer}, p::Float64; pad = 0, stride = k) where N
+function LPNormPool(k::NTuple{N,Integer}, p::T; pad = 0, stride = k) where {N,T}
   stride = expand(Val(N), stride)
   pad = calc_padding(LPNormPool, pad, k, 1, stride)
   return LPNormPool(k, p, pad, stride)

From 796321b3cdb09f84a7239fb377ac90ffa2d52193 Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Sat, 11 Feb 2023 06:58:50 +0000
Subject: [PATCH 4/6] update: T => Real

---
 src/layers/conv.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index d6407c631b..e1adf350c4 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -634,7 +634,7 @@ function (g::GlobalMeanPool)(x)
 end
 
 """
-    GlobalLPNormPool(p::T)
+    GlobalLPNormPool(p::Real)
 
 Global lp norm pooling layer.
 
@@ -652,8 +652,8 @@ julia> m(xs) |> size
 (1, 1, 7, 50)
 ```
 """
-struct GlobalLPNormPool{T<:Number}
-  p::T
+struct GlobalLPNormPool
+  p::Real
 end
 
 function (g::GlobalLPNormPool)(x)
@@ -786,7 +786,7 @@ function Base.show(io::IO, m::MeanPool)
 end
 
 """
-    LPNormPool(window::NTuple, p::T; pad=0, stride=window)
+    LPNormPool(window::NTuple, p::Real; pad=0, stride=window)
 
 Lp norm pooling layer, calculating p-norm distance for each window,
 also known as LPPool in pytorch.
@@ -825,14 +825,14 @@ julia> layer(rand(Float32, 100, 7, 50)) |> size
 (34, 7, 50)
 ```
 """
-struct LPNormPool{N,M,T<:Number}
+struct LPNormPool{N,M}
   k::NTuple{N,Int}
-  p::T
+  p::Real
   pad::NTuple{M,Int}
   stride::NTuple{N,Int}
 end
 
-function LPNormPool(k::NTuple{N,Integer}, p::T; pad = 0, stride = k) where {N,T}
+function LPNormPool(k::NTuple{N,Integer}, p::Real; pad = 0, stride = k) where {N}
   stride = expand(Val(N), stride)
   pad = calc_padding(LPNormPool, pad, k, 1, stride)
   return LPNormPool(k, p, pad, stride)

From e73c3d175a42f18238bb5dfaeeba5d1ac6e6e396 Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Sat, 11 Feb 2023 13:00:45 +0000
Subject: [PATCH 5/6] add: throw DomainError when LPNormPool uses negative
 numbers as input in Flux, rather than 'function ^(x, y)' in Base.Math

---
 src/layers/conv.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index e1adf350c4..d368e21bfc 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -640,6 +640,8 @@ Global lp norm pooling layer.
 
 Transform (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
 by performing lp norm pooling on the complete (w,h)-shaped feature maps.
+And expects input `x` to satisfy `all(x .>= 0)` to avoid `function ^(x, y)`
+in Base.Math throw DomainError.
 
 See also [`LPNormPool`](@ref).
 
@@ -793,6 +795,7 @@ also known as LPPool in pytorch.
 
 Expects as input an array with `ndims(x) == N+2`, i.e. channel and
 batch dimensions, after the `N` feature dimensions, where `N = length(window)`.
+Also expects `all(x .>= 0)` to avoid `function ^(x, y)` in Base.Math throw DomainError.
 
 By default the window size is also the stride in each dimension.
 The keyword `pad` accepts the same options as for the `Conv` layer,
@@ -839,6 +842,7 @@ function LPNormPool(k::NTuple{N,Integer}, p::Real; pad = 0, stride = k) where {N
 end
 
 function (l::LPNormPool)(x)
+  all(x .>= 0) || throw(DomainError("LPNormPool requires 'all(x .>= 0)'. Relu before LPNormPool is recommended."))
   pdims = PoolDims(x, l.k; padding=l.pad, stride=l.stride)
   return lpnormpool(x, pdims; p=l.p)
 end

From b8415dd144426543104a75a70006602abb13a300 Mon Sep 17 00:00:00 2001
From: skyleaworlder <870033938@qq.com>
Date: Mon, 13 Feb 2023 05:38:35 +0000
Subject: [PATCH 6/6] fix: check p & use @ignore_derivatives

---
 src/layers/conv.jl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index d368e21bfc..40eb80866b 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -640,8 +640,7 @@ Global lp norm pooling layer.
 
 Transform (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
 by performing lp norm pooling on the complete (w,h)-shaped feature maps.
-And expects input `x` to satisfy `all(x .>= 0)` to avoid `function ^(x, y)`
-in Base.Math throw DomainError.
+And expects input `x` to satisfy `all(x .>= 0)` to avoid DomainError.
 
 See also [`LPNormPool`](@ref).
 
@@ -795,7 +794,7 @@ also known as LPPool in pytorch.
 
 Expects as input an array with `ndims(x) == N+2`, i.e. channel and
 batch dimensions, after the `N` feature dimensions, where `N = length(window)`.
-Also expects `all(x .>= 0)` to avoid `function ^(x, y)` in Base.Math throw DomainError.
+Also expects `all(x .>= 0)` to avoid DomainError.
 
 By default the window size is also the stride in each dimension.
 The keyword `pad` accepts the same options as for the `Conv` layer,
@@ -842,7 +841,9 @@ function LPNormPool(k::NTuple{N,Integer}, p::Real; pad = 0, stride = k) where {N
 end
 
 function (l::LPNormPool)(x)
-  all(x .>= 0) || throw(DomainError("LPNormPool requires 'all(x .>= 0)'. Relu before LPNormPool is recommended."))
+  iseven(l.p) || ChainRulesCore.@ignore_derivatives if any(<(0), x)
+    throw(DomainError("LPNormPool requires x to be non-negative"))
+  end
   pdims = PoolDims(x, l.k; padding=l.pad, stride=l.stride)
   return lpnormpool(x, pdims; p=l.p)
 end