diff --git a/src/DoubleDouble.jl b/src/DoubleDouble.jl index 5511147..7c2ec5c 100644 --- a/src/DoubleDouble.jl +++ b/src/DoubleDouble.jl @@ -31,19 +31,16 @@ end Double{T<:AbstractFloat}(u::T, v::T) = Double{T}(u, v) Double{T<:AbstractFloat}(x::T) = Double(x, zero(T)) - -const half64 = 1.34217729e8 -const half32 = 4097f0 -const half16 = Float16(33.0) -const halfBig = 3.402823669209384634633746074317682114570000000000000000000000000000000000000000e+38 -# 6.805647338418769269267492148635364229120000000000000000000000000000000000000000e38 - -# round floats to half-precision -# TODO: fix overflow for large values -halfprec(x::Float64) = (p = x*half64; (x-p)+p) # signif(x,26,2) for 26 is 6.7108865e7, this seems like 27 -halfprec(x::Float32) = (p = x*half32; (x-p)+p) # float32(signif(x,12,2)) -halfprec(x::Float16) = (p = x*half16; (x-p)+p) # float16(signif(x,5,2)) -halfprec(x::BigFloat) = (p = x*halfBig; (x-p)+p) # BigFloat(signif(x,128,2)) +# use Veltkamp splitting to remove trailing digits +# see "Handbook of Floating-point Arithmetic", ยง4.4.1 +# TODO: +# - fix overflow for large values +# - use fma when available +function halfprec{T<:AbstractFloat}(x::T) + c = T((1 << cld(precision(T),2)) + 1) + p = x*c + (x-p)+p +end function splitprec(x::AbstractFloat) h = halfprec(x) diff --git a/test/runtests.jl b/test/runtests.jl index 5158aeb..a3cc38a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -64,3 +64,9 @@ a = Double(big"3.1") @test Double{Float32}(3) === Double{Float32}(3.0f0, 0.0f0) @test Single{Float32}(BigFloat(3)) === Single{Float32}(3.0f0) @test Double{Float32}(BigFloat(3)) === Double{Float32}(3.0f0, 0.0f0) + + +# issue #30 +x, y = Single(Float16(0.9775)), Single(Float16(0.5156)) +d = x*y +@test widen(d.hi) + widen(d.lo) == widen(x.hi) * widen(y.hi)