Skip to content
This repository has been archived by the owner on Dec 10, 2023. It is now read-only.

Use correct calculation for Veltkamp splitting #31

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions src/DoubleDouble.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,16 @@ end
Double{T<:AbstractFloat}(u::T, v::T) = Double{T}(u, v)
Double{T<:AbstractFloat}(x::T) = Double(x, zero(T))


const half64 = 1.34217729e8
const half32 = 4097f0
const half16 = Float16(33.0)
const halfBig = 3.402823669209384634633746074317682114570000000000000000000000000000000000000000e+38
# 6.805647338418769269267492148635364229120000000000000000000000000000000000000000e38

# round floats to half-precision
# TODO: fix overflow for large values
halfprec(x::Float64) = (p = x*half64; (x-p)+p) # signif(x,26,2) for 26 is 6.7108865e7, this seems like 27
halfprec(x::Float32) = (p = x*half32; (x-p)+p) # float32(signif(x,12,2))
halfprec(x::Float16) = (p = x*half16; (x-p)+p) # float16(signif(x,5,2))
halfprec(x::BigFloat) = (p = x*halfBig; (x-p)+p) # BigFloat(signif(x,128,2))
# use Veltkamp splitting to remove trailing digits
# see "Handbook of Floating-point Arithmetic", §4.4.1
# TODO:
# - fix overflow for large values
# - use fma when available
function halfprec{T<:AbstractFloat}(x::T)
c = T((1 << cld(precision(T),2)) + 1)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this won't work for BigFloats

p = x*c
(x-p)+p
end

function splitprec(x::AbstractFloat)
h = halfprec(x)
Expand Down
6 changes: 6 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,9 @@ a = Double(big"3.1")
@test Double{Float32}(3) === Double{Float32}(3.0f0, 0.0f0)
@test Single{Float32}(BigFloat(3)) === Single{Float32}(3.0f0)
@test Double{Float32}(BigFloat(3)) === Double{Float32}(3.0f0, 0.0f0)


# issue #30
x, y = Single(Float16(0.9775)), Single(Float16(0.5156))
d = x*y
@test widen(d.hi) + widen(d.lo) == widen(x.hi) * widen(y.hi)