# This file is a part of Julia. Except for the rem_pio2_kernel, and # cody_waite_* methods (see below) license is MIT: https://julialang.org/license # rem_pio2_kernel and cody_waite_* methods are heavily based on FDLIBM code: # __ieee754_rem_pio2, that is made available under the following licence: ## Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. ## ## Developed at SunPro, a Sun Microsystems, Inc. business. ## Permission to use, copy, modify, and distribute this ## software is freely granted, provided that this notice ## is preserved. # Bits of 1/2π # 1/2π == sum(x / 0x1p64^i for i,x = enumerate(INV_2PI)) # Can be obtained by: # # setprecision(BigFloat, 4096) # I = 0.5/big(pi) # for i = 1:19 # I *= 0x1p64 # k = trunc(UInt64, I) # @printf "0x%016x,\n" k # I -= k # end const INV_2PI = ( 0x28be_60db_9391_054a, 0x7f09_d5f4_7d4d_3770, 0x36d8_a566_4f10_e410, 0x7f94_58ea_f7ae_f158, 0x6dc9_1b8e_9093_74b8, 0x0192_4bba_8274_6487, 0x3f87_7ac7_2c4a_69cf, 0xba20_8d7d_4bae_d121, 0x3a67_1c09_ad17_df90, 0x4e64_758e_60d4_ce7d, 0x2721_17e2_ef7e_4a0e, 0xc7fe_25ff_f781_6603, 0xfbcb_c462_d682_9b47, 0xdb4d_9fb3_c9f2_c26d, 0xd3d1_8fd9_a797_fa8b, 0x5d49_eeb1_faf9_7c5e, 0xcf41_ce7d_e294_a4ba, 0x9afe_d7ec_47e3_5742, 0x1580_cc11_bf1e_daea) @inline function cody_waite_2c_pio2(x::Float64, fn, n) pio2_1 = 1.57079632673412561417e+00 pio2_1t = 6.07710050650619224932e-11 z = muladd(-fn, pio2_1, x) # x - fn*pio2_1 y1 = muladd(-fn, pio2_1t, z) # z - fn*pio2_1t y2 = muladd(-fn, pio2_1t, (z - y1)) # (z - y1) - fn*pio2_1t n, DoubleFloat64(y1, y2) end @inline function cody_waite_ext_pio2(x::Float64, xhp) pio2_1 = 1.57079632673412561417e+00 pio2_1t = 6.07710050650619224932e-11 pio2_2 = 6.07710050630396597660e-11 pio2_2t = 2.02226624879595063154e-21 pio2_3 = 2.02226624871116645580e-21 pio2_3t = 8.47842766036889956997e-32 fn = round(x*(2/pi)) # round to integer # on older systems, the above could be faster with # rf = 1.5/eps(Float64) # fn = (x*(2/pi)+rf)-rf r = muladd(-fn, pio2_1, x) # x - fn*pio2_1 w = fn*pio2_1t # 1st round good to 85 bit j = xhp>>20 y1 = r-w high = highword(y1) i = j-((high>>20)&0x7ff) if i>16 # 2nd iteration needed, good to 118 t = r w = fn*pio2_2 r = t-w w = muladd(fn, pio2_2t,-((t-r)-w)) y1 = r-w high = highword(y1) i = j-((high>>20)&0x7ff) if i>49 # 3rd iteration need, 151 bits acc t = r # will cover all possible cases w = fn*pio2_3 r = t-w w = muladd(fn, pio2_3t, -((t-r)-w)) y1 = r-w end end y2 = (r-y1)-w return unsafe_trunc(Int, fn), DoubleFloat64(y1, y2) end """ fromfraction(f::Int128) Compute a tuple of values `(z1,z2)` such that ``z1 + z2 == f / 2^128`` and the significand of `z1` has 27 trailing zeros. """ function fromfraction(f::Int128) if f == 0 return (0.0,0.0) end # 1. get leading term truncated to 26 bits s = ((f < 0) % UInt64) << 63 # sign bit x = abs(f) % UInt128 # magnitude n1 = Base.top_set_bit(x) # ndigits0z(x,2) m1 = ((x >> (n1-26)) % UInt64) << 27 d1 = ((n1-128+1021) % UInt64) << 52 z1 = reinterpret(Float64, s | (d1 + m1)) # 2. compute remaining term x2 = (x - (UInt128(m1) << (n1-53))) if x2 == 0 return (z1, 0.0) end n2 = Base.top_set_bit(x2) m2 = (x2 >> (n2-53)) % UInt64 d2 = ((n2-128+1021) % UInt64) << 52 z2 = reinterpret(Float64, s | (d2 + m2)) return (z1,z2) end # XXX we want to mark :consistent-cy here so that this function can be concrete-folded, # because the effect analysis currently can't prove it in the presence of `@inbounds` or # `:boundscheck`, but still the accesses to `INV_2PI` are really safe here Base.@assume_effects :consistent function paynehanek(x::Float64) # 1. Convert to form # # x = X * 2^k, # # where 2^(n-1) <= X < 2^n is an n-bit integer (n = 53, k = exponent(x)-52 ) # Computations are integer based, so reinterpret x as UInt64 u = reinterpret(UInt64, x) # Strip x of exponent bits and replace with ^1 X = (u & significand_mask(Float64)) | (one(UInt64) << significand_bits(Float64)) # Get k from formula above # k = exponent(x)-52 raw_exponent = ((u & exponent_mask(Float64)) >> significand_bits(Float64)) % Int k = raw_exponent - exponent_bias(Float64) - significand_bits(Float64) # 2. Let α = 1/2π, then: # # α*x mod 1 ≡ [(α*2^k mod 1)*X] mod 1 # # so we can ignore the first k bits of α. Extract the next 3 64-bit parts of α. # # i.e. equivalent to # setprecision(BigFloat,4096) # α = 1/(2*big(pi)) # A = mod(ldexp(α,k), 1) # z1 = ldexp(A,64) # a1 = trunc(UInt64, z1) # z2 = ldexp(z1-a1, 64) # a2 = trunc(UInt64, z2) # z3 = ldexp(z2-a2, 64) # a3 = trunc(UInt64, z3) # This is equivalent to # idx, shift = divrem(k, 64) # but divrem is slower. idx = k >> 6 shift = k - (idx << 6) if shift == 0 @inbounds a1 = INV_2PI[idx+1] @inbounds a2 = INV_2PI[idx+2] @inbounds a3 = INV_2PI[idx+3] else # use shifts to extract the relevant 64 bit window @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift)) @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift)) @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift)) end # 3. Perform the multiplication: # # X. 0 0 0 # × 0. a1 a2 a3 # ============== # _. w w _ # # (i.e. ignoring integer and lowest bit parts of result) w1 = UInt128(X*a1) << 64 # overflow becomes integer w2 = widemul(X,a2) w3 = widemul(X,a3) >> 64 w = w1 + w2 + w3 # quotient fraction after division by 2π # adjust for sign of x w = flipsign(w,x) # 4. convert to quadrant, quotient fraction after division by π/2: q = (((w>>125)%Int +1)>>1) # nearest quadrant f = (w<<2) % Int128 # fraction part of quotient after division by π/2, taking values on [-0.5,0.5) # 5. convert quotient fraction to split precision Float64 z_hi,z_lo = fromfraction(f) # 6. multiply by π/2 pio2 = 1.5707963267948966 pio2_hi = 1.5707963407039642 pio2_lo = -1.3909067614167116e-8 y_hi = (z_hi+z_lo)*pio2 y_lo = (((z_hi*pio2_hi - y_hi) + z_hi*pio2_lo) + z_lo*pio2_hi) + z_lo*pio2_lo return q, DoubleFloat64(y_hi, y_lo) end """ rem_pio2_kernel(x::Union{Float32, Float64}) Calculate `x` divided by `π/2` accurately for arbitrarily large `x`. Returns a pair `(k, r)`, where `k` is the quadrant of the result (multiple of π/2) and `r` is the remainder, such that ``k * π/2 = x - r``. The remainder is given as a double-double pair. `k` is positive if `x > 0` and is negative if `x ≤ 0`. """ @inline function rem_pio2_kernel(x::Float64) # accurate to 1e-22 xhp = poshighword(x) # xhp <= highword(5pi/4) implies |x| ~<= 5pi/4, if xhp <= 0x400f6a7a # last five bits of xhp == last five bits of highword(pi/2) or # highword(2pi/2) implies |x| ~= pi/2 or 2pi/2, if (xhp & 0xfffff) == 0x921fb # use precise Cody Waite scheme return cody_waite_ext_pio2(x, xhp) end # use Cody Waite with two constants # xhp <= highword(3pi/4) implies |x| ~<= 3pi/4 if xhp <= 0x4002d97c if x > 0.0 return cody_waite_2c_pio2(x, 1.0, 1) else return cody_waite_2c_pio2(x, -1.0, -1) end # 3pi/4 < |x| <= 5pi/4 else if x > 0.0 return cody_waite_2c_pio2(x, 2.0, 2) else return cody_waite_2c_pio2(x, -2.0, -2) end end end # xhp <= highword(9pi/4) implies |x| ~<= 9pi/4 if xhp <= 0x401c463b # xhp <= highword(7pi/4) implies |x| ~<= 7pi/4 if xhp <= 0x4015fdbc # xhp == highword(3pi/2) implies |x| ~= 3pi/2 if xhp == 0x4012d97c # use precise Cody Waite scheme return cody_waite_ext_pio2(x, xhp) end # use Cody Waite with two constants if x > 0.0 return cody_waite_2c_pio2(x, 3.0, 3) else return cody_waite_2c_pio2(x, -3.0, -3) end # 7pi/4 < |x| =< 9pi/4 else # xhp == highword(4pi/2) implies |x| ~= 4pi/2 if xhp == 0x401921fb # use precise Cody Waite scheme return cody_waite_ext_pio2(x, xhp) end # use Cody Waite with two constants if x > 0.0 return cody_waite_2c_pio2(x, 4.0, 4) else return cody_waite_2c_pio2(x, -4.0, -4) end end end # xhp < highword(2.0^20*pi/2) implies |x| ~< 2^20*pi/2 if xhp < 0x413921fb # use precise Cody Waite scheme return cody_waite_ext_pio2(x, xhp) end # if |x| >= 2^20*pi/2 switch to Payne Hanek return paynehanek(x) end @inline function rem_pio2_kernel(x::Float32) xd = convert(Float64, x) # use Cody Waite reduction with two coefficients if abs(x) < Float32(pi*0x1p27) # x < 2^28 * pi/2 fn = round(xd * (2/pi)) r = fma(fn, -pi/2, xd) y = fma(fn, -6.123233995736766e-17, r) # big(pi)/2 - pi/2 remainder return unsafe_trunc(Int, fn), DoubleFloat32(y) end n, y = @noinline paynehanek(xd) return n, DoubleFloat32(y.hi) end