github.com/primecitizens/pcz/std@v0.2.1/math/trig_reduce.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package math
     6  
     7  import (
     8  	"github.com/primecitizens/pcz/std/core/bits"
     9  )
    10  
    11  // reduceThreshold is the maximum value of x where the reduction using Pi/4
    12  // in 3 float64 parts still gives accurate results. This threshold
    13  // is set by y*C being representable as a float64 without error
    14  // where y is given by y = floor(x * (4 / Pi)) and C is the leading partial
    15  // terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30
    16  // and 32 trailing zero bits, y should have less than 30 significant bits.
    17  //
    18  //	y < 1<<30  -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4
    19  //
    20  // So, conservatively we can take x < 1<<29.
    21  // Above this threshold Payne-Hanek range reduction must be used.
    22  const reduceThreshold = 1 << 29
    23  
    24  // trigReduce implements Payne-Hanek range reduction by Pi/4
    25  // for x > 0. It returns the integer part mod 8 (j) and
    26  // the fractional part (z) of x / (Pi/4).
    27  // The implementation is based on:
    28  // "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
    29  // K. C. Ng et al, March 24, 1992
    30  // The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic.
    31  func trigReduce(x float64) (j uint64, z float64) {
    32  	const PI4 = Pi / 4
    33  	if x < PI4 {
    34  		return 0, x
    35  	}
    36  	// Extract out the integer and exponent such that,
    37  	// x = ix * 2 ** exp.
    38  	ix := Float64bits(x)
    39  	exp := int(ix>>shift&mask) - bias - shift
    40  	ix &^= mask << shift
    41  	ix |= 1 << shift
    42  	// Use the exponent to extract the 3 appropriate uint64 digits from mPi4,
    43  	// B ~ (z0, z1, z2), such that the product leading digit has the exponent -61.
    44  	// Note, exp >= -53 since x >= PI4 and exp < 971 for maximum float64.
    45  	digit, bitshift := uint(exp+61)/64, uint(exp+61)%64
    46  	z0 := (mPi4[digit] << bitshift) | (mPi4[digit+1] >> (64 - bitshift))
    47  	z1 := (mPi4[digit+1] << bitshift) | (mPi4[digit+2] >> (64 - bitshift))
    48  	z2 := (mPi4[digit+2] << bitshift) | (mPi4[digit+3] >> (64 - bitshift))
    49  	// Multiply mantissa by the digits and extract the upper two digits (hi, lo).
    50  	z2hi, _ := bits.Mul64(z2, ix)
    51  	z1hi, z1lo := bits.Mul64(z1, ix)
    52  	z0lo := z0 * ix
    53  	lo, c := bits.Add64(z1lo, z2hi, 0)
    54  	hi, _ := bits.Add64(z0lo, z1hi, c)
    55  	// The top 3 bits are j.
    56  	j = hi >> 61
    57  	// Extract the fraction and find its magnitude.
    58  	hi = hi<<3 | lo>>61
    59  	lz := uint(bits.LeadingZeros64(hi))
    60  	e := uint64(bias - (lz + 1))
    61  	// Clear implicit mantissa bit and shift into place.
    62  	hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
    63  	hi >>= 64 - shift
    64  	// Include the exponent and convert to a float.
    65  	hi |= e << shift
    66  	z = Float64frombits(hi)
    67  	// Map zeros to origin.
    68  	if j&1 == 1 {
    69  		j++
    70  		j &= 7
    71  		z--
    72  	}
    73  	// Multiply the fractional part by pi/4.
    74  	return j, z * PI4
    75  }
    76  
    77  // mPi4 is the binary digits of 4/pi as a uint64 array,
    78  // that is, 4/pi = Sum mPi4[i]*2^(-64*i)
    79  // 19 64-bit digits and the leading one bit give 1217 bits
    80  // of precision to handle the largest possible float64 exponent.
    81  var mPi4 = [...]uint64{
    82  	0x0000000000000001,
    83  	0x45f306dc9c882a53,
    84  	0xf84eafa3ea69bb81,
    85  	0xb6c52b3278872083,
    86  	0xfca2c757bd778ac3,
    87  	0x6e48dc74849ba5c0,
    88  	0x0c925dd413a32439,
    89  	0xfc3bd63962534e7d,
    90  	0xd1046bea5d768909,
    91  	0xd338e04d68befc82,
    92  	0x7323ac7306a673e9,
    93  	0x3908bf177bf25076,
    94  	0x3ff12fffbc0b301f,
    95  	0xde5e2316b414da3e,
    96  	0xda6cfd9e4f96136e,
    97  	0x9e8c7ecd3cbfd45a,
    98  	0xea4f758fd7cbe2f6,
    99  	0x7a0e73ef14a525d4,
   100  	0xd7f6bf623f1aba10,
   101  	0xac06608df8f6d757,
   102  }