github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/runtime/sqrt.go

github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/runtime/sqrt.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Copy of math/sqrt.go, here for use by ARM softfloat.
     6  
     7  package runtime
     8  
     9  import "unsafe"
    10  
    11  // The original C code and the long comment below are
    12  // from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
    13  // came with this notice.  The go code is a simplified
    14  // version of the original C.
    15  //
    16  // ====================================================
    17  // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    18  //
    19  // Developed at SunPro, a Sun Microsystems, Inc. business.
    20  // Permission to use, copy, modify, and distribute this
    21  // software is freely granted, provided that this notice
    22  // is preserved.
    23  // ====================================================
    24  //
    25  // __ieee754_sqrt(x)
    26  // Return correctly rounded sqrt.
    27  //           -----------------------------------------
    28  //           | Use the hardware sqrt if you have one |
    29  //           -----------------------------------------
    30  // Method:
    31  //   Bit by bit method using integer arithmetic. (Slow, but portable)
    32  //   1. Normalization
    33  //      Scale x to y in [1,4) with even powers of 2:
    34  //      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
    35  //              sqrt(x) = 2**k * sqrt(y)
    36  //   2. Bit by bit computation
    37  //      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
    38  //           i                                                   0
    39  //                                     i+1         2
    40  //          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
    41  //           i      i            i                 i
    42  //
    43  //      To compute q    from q , one checks whether
    44  //                  i+1       i
    45  //
    46  //                            -(i+1) 2
    47  //                      (q + 2      )  <= y.                     (2)
    48  //                        i
    49  //                                                            -(i+1)
    50  //      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
    51  //                             i+1   i             i+1   i
    52  //
    53  //      With some algebraic manipulation, it is not difficult to see
    54  //      that (2) is equivalent to
    55  //                             -(i+1)
    56  //                      s  +  2       <= y                       (3)
    57  //                       i                i
    58  //
    59  //      The advantage of (3) is that s  and y  can be computed by
    60  //                                    i      i
    61  //      the following recurrence formula:
    62  //          if (3) is false
    63  //
    64  //          s     =  s  ,       y    = y   ;                     (4)
    65  //           i+1      i          i+1    i
    66  //
    67  //      otherwise,
    68  //                         -i                      -(i+1)
    69  //          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
    70  //           i+1      i          i+1    i     i
    71  //
    72  //      One may easily use induction to prove (4) and (5).
    73  //      Note. Since the left hand side of (3) contain only i+2 bits,
    74  //            it does not necessary to do a full (53-bit) comparison
    75  //            in (3).
    76  //   3. Final rounding
    77  //      After generating the 53 bits result, we compute one more bit.
    78  //      Together with the remainder, we can decide whether the
    79  //      result is exact, bigger than 1/2ulp, or less than 1/2ulp
    80  //      (it will never equal to 1/2ulp).
    81  //      The rounding mode can be detected by checking whether
    82  //      huge + tiny is equal to huge, and whether huge - tiny is
    83  //      equal to huge for some floating point number "huge" and "tiny".
    84  //
    85  //
    86  // Notes:  Rounding mode detection omitted.
    87  
    88  const (
    89  	float64Mask  = 0x7FF
    90  	float64Shift = 64 - 11 - 1
    91  	float64Bias  = 1023
    92  	maxFloat64   = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
    93  )
    94  
    95  func float64bits(f float64) uint64     { return *(*uint64)(unsafe.Pointer(&f)) }
    96  func float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) }
    97  
    98  func sqrt(x float64) float64 {
    99  	// special cases
   100  	switch {
   101  	case x == 0 || x != x || x > maxFloat64:
   102  		return x
   103  	case x < 0:
   104  		return nan()
   105  	}
   106  	ix := float64bits(x)
   107  	// normalize x
   108  	exp := int((ix >> float64Shift) & float64Mask)
   109  	if exp == 0 { // subnormal x
   110  		for ix&1<<float64Shift == 0 {
   111  			ix <<= 1
   112  			exp--
   113  		}
   114  		exp++
   115  	}
   116  	exp -= float64Bias // unbias exponent
   117  	ix &^= float64Mask << float64Shift
   118  	ix |= 1 << float64Shift
   119  	if exp&1 == 1 { // odd exp, double x to make it even
   120  		ix <<= 1
   121  	}
   122  	exp >>= 1 // exp = exp/2, exponent of square root
   123  	// generate sqrt(x) bit by bit
   124  	ix <<= 1
   125  	var q, s uint64                      // q = sqrt(x)
   126  	r := uint64(1 << (float64Shift + 1)) // r = moving bit from MSB to LSB
   127  	for r != 0 {
   128  		t := s + r
   129  		if t <= ix {
   130  			s = t + r
   131  			ix -= t
   132  			q += r
   133  		}
   134  		ix <<= 1
   135  		r >>= 1
   136  	}
   137  	// final rounding
   138  	if ix != 0 { // remainder, result not exact
   139  		q += q & 1 // round according to extra bit
   140  	}
   141  	ix = q>>1 + uint64(exp-1+float64Bias)<<float64Shift // significand + biased exponent
   142  	return float64frombits(ix)
   143  }