-
Notifications
You must be signed in to change notification settings - Fork 63
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enzyme segfaults on the following MWE #2032
Comments
mwe.jl using Enzyme
include("NN.jl")
using LinearAlgebra
const layers = [Dense(4,2*4*4+10,relu!),Dense(2*4*4+10,2*4*4,relu!)]
const layers2 = [Dense(4,2*4*4+10,relu!),Dense(2*4*4+10,2*4*4,softmax!)] #this nn fails to AD
const nn = makeNN(layers)
const nn2 = makeNN(layers2)
##cell
function make_dfunc(nn)
outp = zeros(32)
scratch = zeros(8)
amps = zeros(4)
function dfunc(du,u,nnparams,t)
set_nnparams(nn,nnparams)
applyNN!(nn,amps,outp)
nothing
end
return dfunc,nn
end
u0 = rand(8)
der_u0 = zero(u0)
du0 = zeros(8)
der_du0 = rand(length(du0))
dfunc,_ = make_dfunc(nn)
dfunc2,_ = make_dfunc(nn2)
nnparams = get_nnparams(nn)
nnparams2 = get_nnparams(nn2)
dfunc(du0,u0,nnparams,0.1)
println("first nn")
res1 = autodiff(Reverse,Duplicated(dfunc,make_zero(dfunc)),Duplicated(du0,der_du0),Duplicated(u0,der_u0),Duplicated(nnparams,make_zero(nnparams)),Active(0.1))
println("second nn") #segfault
res2 = autodiff(Reverse,Duplicated(dfunc2,make_zero(dfunc2)),Duplicated(du0,der_du0),Duplicated(u0,der_u0),Duplicated(nnparams,make_zero(nnparams)),Active(0.1)) |
NN.jl # fast simple barebones cpu only allocation free dense neural networks
# use Enzyme for gradients
import Base.zero
function tanh!(ret::Array,x::Array)
ret .= tanh.(x)
nothing
end
function relu!(ret,x)
ret .= max.(0.0, x)
nothing
end
function softmax!(ret,x)
ret .= exp.(x)
ret .= ret ./ sum(ret)
nothing
end
struct Dense{T,F<:Function}
n_inp::Int
n_nodes::Int
W::Matrix{T}
b::Vector{T}
activation::F
end
"""
dense layer ,
f ::activation (should take arguments (ret,inp) and store outputs on ret. check `relu` for more details
randfn :: random function called randfn(a,b) used to initialize the layers matrix
"""
function Dense(n_inp, n_nodes, f::Function, randfn::Function = rand)
Dense(n_inp, n_nodes, randfn(n_nodes, n_inp), randfn(n_nodes), f)
end
struct NN{T,L<:Tuple}
n_inp::Int
layers::L # Tuple of Dense
intermediates::Vector{Vector{T}} # preallocated vectors for output of layers
end
"""
make an NN , consequent layers must have matching inputs and number of nodes
(i.e n_nodes of i'th layer == n_inp of i+1th layer)
#TODO automate this to be nicer.
"""
function makeNN(n_inp, layers::Array, T::Type = Float64)
@assert length(layers) >= 1
@assert n_inp == layers[1].n_inp
""" assert consecutive layers match in input and nodes"""
for i in eachindex(layers)[1:end-1]
@assert layers[i].n_nodes == layers[i+1].n_inp
end
NN(n_inp, Tuple(layers), Vector{T}[zeros(layer.n_nodes) for layer in layers])
end
function makeNN(layers::Array,T::Type=Float64)
makeNN(layers[1].n_inp,layers,T)
end
"""
get number of parameters in the nn
"""
function paramlength(nn::NN)
r = 0
for l in nn.layers
r = r + length(l.W)
r = r + length(l.b)
end
return r
end
"""
get the parameters of the nn flattened in an array
"""
function get_nnparams(nn::NN)
ret = Float64[]
for l in nn.layers
append!(ret, l.W)
append!(ret, l.b)
end
return ret
end
function set_denseparams(d::Dense,arr)
d.W .= reshape(view(arr,1:length(d.W)),size(d.W))
d.b .= view(arr,length(d.W)+1:length(d.W)+1+length(d.b)-1)
end
"""
set a flattened array of params to nn. (possibly not type stable if layers have different activations)
Note, This does not error if params is larger than number of params of the nn.
(we dont assert because this could be be part of a hotloop)
"""
function set_nnparams2(nn, nnparams)
i = 1
for j in 1:length(nn.layers)
ll= nn.layers[j]
set_denseparams(ll, view(nnparams,i:i+length(ll.W)+length(ll.b)-1) )
i=i + length(ll.W) + length(ll.b)
end
nothing
end
"""
set a flattened array of params to nn. (this is type stable)
Note, This does not error if params is larger than number of params of the nn.
"""
@generated function set_nnparams(nn::NN{T, <:NTuple{N, Any}}, nnparams) where {T, N}
quote
i = 1
Base.Cartesian.@nexprs $N j -> begin
#l = nn.layers[j]
#l.W .= reshape(view(nnparams,i:(i+length(l.W)-1)),size(l.W))
#i = i + length(l.W)
#l.b .= view(nnparams,i:(i+length(l.b)-1))
#i = i + length(l.b)
ll= nn.layers[j]
set_denseparams(ll, view(nnparams,i:i+length(ll.W)+length(ll.b)-1) )
i=i + length(ll.W) + length(ll.b)
end
nothing
end
end
"""
returns a similar nn with all 0 params and intermediates
(use make_zero instead if making shadow for autodiff)
"""
function Base.zero(nn::NN)
newnn = deepcopy(nn)
for l in newnn.layers
l.W .= 0.0
l.b .= 0.0
end
for inter in newnn.intermediates
inter .= 0.0
end
return newnn
end
"""
apply dense layer on inp and store the result in out.
inp : a vector of d.inp size.
out : a vector of d.nodes size.
note! uses mul!, `inp` and `out` should not be aliased.
"""
function applydense!(d::Dense, inp, out)
mul!(out, d.W, inp, 1.0, 0.0)
out .+= d.b
d.activation(out,out)
nothing
end
"""
apply neural network nn on vector `inp` and store result in `out`
"""
function applyNN!(nn::NN, inp, out)
applydense!(nn.layers[1], inp, nn.intermediates[1])
for i in eachindex(nn.layers)[2:end]
applydense!(nn.layers[i], nn.intermediates[i-1], nn.intermediates[i])
end
out .= nn.intermediates[end]
nothing
end
|
Project.toml [deps]
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" |
what julia version did you use? |
1.10.5
|
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
run mwe.jl in the uploaded folder.
enzymemwe.zip
or below
The text was updated successfully, but these errors were encountered: