Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make extract_gradient[_chunk]! GPU compatible #619

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ julia = "1.6"
Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
DiffTests = "de460e47-3fe3-5279-bb4a-814414816d5d"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Calculus", "DiffTests", "SparseArrays", "Test", "InteractiveUtils"]
test = ["Calculus", "DiffTests", "SparseArrays", "Test", "InteractiveUtils", "JLArrays"]
8 changes: 4 additions & 4 deletions src/gradient.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ function extract_gradient!(::Type{T}, result::DiffResult, dual::Dual) where {T}
end

extract_gradient!(::Type{T}, result::AbstractArray, y::Real) where {T} = fill!(result, zero(y))
extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T}= copyto!(result, partials(T, dual))
extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T} =
extract_gradient_chunk!(T, result, dual, 1, npartials(dual))
Comment on lines +83 to +84
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still somewhat unhappy about this change, intuitively it feels that it should be easier for base and packages to optimize copyto!(::AbstractArray, ::Tuple) than the chunk-based version. But if benchmarks don't show any regression, I guess it seems OK - and we don't have to wait for CUDA to add the implementation.


function extract_gradient_chunk!(::Type{T}, result, dual, index, chunksize) where {T}
offset = index - 1
for i in 1:chunksize
result[i + offset] = partials(T, dual, i)
map!(view(Base.ReshapedArray(result, (length(result),), ()), index:index+chunksize-1), 1:chunksize) do i
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was looking around a bit and also browsing old julia issues - maybe we just have to use ReshapedArray for optimal performance, even though it's unexported.

In any case, I think it would be good to add some explanations for why the function is implemented in this way:

Suggested change
map!(view(Base.ReshapedArray(result, (length(result),), ()), index:index+chunksize-1), 1:chunksize) do i
# `reshape` without allocations: https://github.com/JuliaLang/julia/issues/36313
out = view(Base.ReshapedArray(result, (length(result),), ()), index:(index+chunksize-1)), 1:chunksize)
# use `map!` instead of a for-loop for GPU compatibility: #619
map!(out) do i

@inbounds partials(T, dual, i)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really safe? The current implementation does not use @inbounds:

Suggested change
@inbounds partials(T, dual, i)
return partials(T, dual, i)

end
return result
end
Expand Down
22 changes: 22 additions & 0 deletions test/AllocationsTest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,26 @@ convert_test_574() = convert(ForwardDiff.Dual{Nothing,ForwardDiff.Dual{Nothing,F

end

@testset "Test extract_gradient! allocations" begin
T = Float64
@testset "vector-mode size(result)=$size" for size in [(4,), (2,2)]
dual = ForwardDiff.Dual(0, (rand(T, size...)...,))
y = Array{T}(undef, size)
alloc = @allocated ForwardDiff.extract_gradient!(Nothing, y, dual)
alloc = @allocated ForwardDiff.extract_gradient!(Nothing, y, dual)
@test alloc == 0
end
@testset "chunk-mode size(result)=$size" for size in [(DEFAULT_CHUNK_THRESHOLD+1,), (DEFAULT_CHUNK_THRESHOLD+1, DEFAULT_CHUNK_THRESHOLD+1)]
Npartials = DEFAULT_CHUNK_THRESHOLD÷2
dual = ForwardDiff.Dual(0, (rand(T, Npartials...)...,))
y = Array{T}(undef, size)
alloc = @allocated ForwardDiff.extract_gradient_chunk!(Nothing, y, dual, 2, Npartials)
alloc = @allocated ForwardDiff.extract_gradient_chunk!(Nothing, y, dual, 2, Npartials)
@test alloc == 0
alloc = @allocated ForwardDiff.extract_gradient_chunk!(Nothing, y, dual, 2, Npartials-1)
alloc = @allocated ForwardDiff.extract_gradient_chunk!(Nothing, y, dual, 2, Npartials-1)
@test alloc == 0
end
end

end
22 changes: 22 additions & 0 deletions test/GradientTest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ using ForwardDiff
using ForwardDiff: Dual, Tag
using StaticArrays
using DiffTests
using JLArrays
JLArrays.allowscalar(false)

include(joinpath(dirname(@__FILE__), "utils.jl"))

Expand Down Expand Up @@ -149,6 +151,26 @@ end
@test isequal(ForwardDiff.gradient(t -> t[1]^t[2], [0.0, 1.5]), [0.0, 0.0])
end


##############################################
# test GPUArray compatibility (via JLArrays) #
##############################################

println(" ...testing GPUArray compatibility (via JLArrays)")

@testset "size = $(size(x))" for x in JLArray.([
rand(1),
rand(DEFAULT_CHUNK_THRESHOLD+1),
rand(1,1),
rand(DEFAULT_CHUNK_THRESHOLD+1,DEFAULT_CHUNK_THRESHOLD+1),
rand(1,1,1)
])

@test ForwardDiff.gradient(prod, x) isa typeof(x)

end


#############
# bug fixes #
#############
Expand Down