github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/math/sqrt.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package math
     6  
     7  // The original C code and the long comment below are
     8  // from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
     9  // came with this notice. The go code is a simplified
    10  // version of the original C.
    11  //
    12  // ====================================================
    13  // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    14  //
    15  // Developed at SunPro, a Sun Microsystems, Inc. business.
    16  // Permission to use, copy, modify, and distribute this
    17  // software is freely granted, provided that this notice
    18  // is preserved.
    19  // ====================================================
    20  //
    21  // __ieee754_sqrt(x)
    22  // Return correctly rounded sqrt.
    23  //           -----------------------------------------
    24  //           | Use the hardware sqrt if you have one |
    25  //           -----------------------------------------
    26  // Method:
    27  //   Bit by bit method using integer arithmetic. (Slow, but portable)
    28  //   1. Normalization
    29  //      Scale x to y in [1,4) with even powers of 2:
    30  //      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
    31  //              sqrt(x) = 2**k * sqrt(y)
    32  //   2. Bit by bit computation
    33  //      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
    34  //           i                                                   0
    35  //                                     i+1         2
    36  //          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
    37  //           i      i            i                 i
    38  //
    39  //      To compute q    from q , one checks whether
    40  //                  i+1       i
    41  //
    42  //                            -(i+1) 2
    43  //                      (q + 2      )  <= y.                     (2)
    44  //                        i
    45  //                                                            -(i+1)
    46  //      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
    47  //                             i+1   i             i+1   i
    48  //
    49  //      With some algebraic manipulation, it is not difficult to see
    50  //      that (2) is equivalent to
    51  //                             -(i+1)
    52  //                      s  +  2       <= y                       (3)
    53  //                       i                i
    54  //
    55  //      The advantage of (3) is that s  and y  can be computed by
    56  //                                    i      i
    57  //      the following recurrence formula:
    58  //          if (3) is false
    59  //
    60  //          s     =  s  ,       y    = y   ;                     (4)
    61  //           i+1      i          i+1    i
    62  //
    63  //      otherwise,
    64  //                         -i                      -(i+1)
    65  //          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
    66  //           i+1      i          i+1    i     i
    67  //
    68  //      One may easily use induction to prove (4) and (5).
    69  //      Note. Since the left hand side of (3) contain only i+2 bits,
    70  //            it does not necessary to do a full (53-bit) comparison
    71  //            in (3).
    72  //   3. Final rounding
    73  //      After generating the 53 bits result, we compute one more bit.
    74  //      Together with the remainder, we can decide whether the
    75  //      result is exact, bigger than 1/2ulp, or less than 1/2ulp
    76  //      (it will never equal to 1/2ulp).
    77  //      The rounding mode can be detected by checking whether
    78  //      huge + tiny is equal to huge, and whether huge - tiny is
    79  //      equal to huge for some floating point number "huge" and "tiny".
    80  //
    81  //
    82  // Notes:  Rounding mode detection omitted. The constants "mask", "shift",
    83  // and "bias" are found in src/math/bits.go
    84  
    85  // Sqrt returns the square root of x.
    86  //
    87  // Special cases are:
    88  //	Sqrt(+Inf) = +Inf
    89  //	Sqrt(±0) = ±0
    90  //	Sqrt(x < 0) = NaN
    91  //	Sqrt(NaN) = NaN
    92  func Sqrt(x float64) float64
    93  
    94  // Note: Sqrt is implemented in assembly on some systems.
    95  // Others have assembly stubs that jump to func sqrt below.
    96  // On systems where Sqrt is a single instruction, the compiler
    97  // may turn a direct call into a direct use of that instruction instead.
    98  
    99  func sqrt(x float64) float64 {
   100  	// special cases
   101  	switch {
   102  	case x == 0 || IsNaN(x) || IsInf(x, 1):
   103  		return x
   104  	case x < 0:
   105  		return NaN()
   106  	}
   107  	ix := Float64bits(x)
   108  	// normalize x
   109  	exp := int((ix >> shift) & mask)
   110  	if exp == 0 { // subnormal x
   111  		for ix&(1<<shift) == 0 {
   112  			ix <<= 1
   113  			exp--
   114  		}
   115  		exp++
   116  	}
   117  	exp -= bias // unbias exponent
   118  	ix &^= mask << shift
   119  	ix |= 1 << shift
   120  	if exp&1 == 1 { // odd exp, double x to make it even
   121  		ix <<= 1
   122  	}
   123  	exp >>= 1 // exp = exp/2, exponent of square root
   124  	// generate sqrt(x) bit by bit
   125  	ix <<= 1
   126  	var q, s uint64               // q = sqrt(x)
   127  	r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB
   128  	for r != 0 {
   129  		t := s + r
   130  		if t <= ix {
   131  			s = t + r
   132  			ix -= t
   133  			q += r
   134  		}
   135  		ix <<= 1
   136  		r >>= 1
   137  	}
   138  	// final rounding
   139  	if ix != 0 { // remainder, result not exact
   140  		q += q & 1 // round according to extra bit
   141  	}
   142  	ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent
   143  	return Float64frombits(ix)
   144  }