gitee.com/quant1x/num@v0.3.2/math32/expm1f.go

gitee.com/quant1x/num@v0.3.2/math32/expm1f.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package math32
     6  
     7  // The original C code, the long comment, and the constants
     8  // below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
     9  // and came with this notice. The go code is a simplified
    10  // version of the original C.
    11  //
    12  // ====================================================
    13  // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    14  //
    15  // Developed at SunPro, a Sun Microsystems, Inc. business.
    16  // Permission to use, copy, modify, and distribute this
    17  // software is freely granted, provided that this notice
    18  // is preserved.
    19  // ====================================================
    20  //
    21  // expm1(x)
    22  // Returns exp(x)-1, the exponential of x minus 1.
    23  //
    24  // Method
    25  //   1. Argument reduction:
    26  //      Given x, find r and integer k such that
    27  //
    28  //               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
    29  //
    30  //      Here a correction term c will be computed to compensate
    31  //      the error in r when rounded to a floating-point number.
    32  //
    33  //   2. Approximating expm1(r) by a special rational function on
    34  //      the interval [0,0.34658]:
    35  //      Since
    36  //          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
    37  //      we define R1(r*r) by
    38  //          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
    39  //      That is,
    40  //          R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
    41  //                   = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
    42  //                   = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
    43  //      We use a special Reme algorithm on [0,0.347] to generate
    44  //      a polynomial of degree 5 in r*r to approximate R1. The
    45  //      maximum error of this polynomial approximation is bounded
    46  //      by 2**-61. In other words,
    47  //          R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
    48  //      where   Q1  =  -1.6666666666666567384E-2,
    49  //              Q2  =   3.9682539681370365873E-4,
    50  //              Q3  =  -9.9206344733435987357E-6,
    51  //              Q4  =   2.5051361420808517002E-7,
    52  //              Q5  =  -6.2843505682382617102E-9;
    53  //      (where z=r*r, and the values of Q1 to Q5 are listed below)
    54  //      with error bounded by
    55  //          |                  5           |     -61
    56  //          | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
    57  //          |                              |
    58  //
    59  //      expm1(r) = exp(r)-1 is then computed by the following
    60  //      specific way which minimize the accumulation rounding error:
    61  //                             2     3
    62  //                            r     r    [ 3 - (R1 + R1*r/2)  ]
    63  //            expm1(r) = r + --- + --- * [--------------------]
    64  //                            2     2    [ 6 - r*(3 - R1*r/2) ]
    65  //
    66  //      To compensate the error in the argument reduction, we use
    67  //              expm1(r+c) = expm1(r) + c + expm1(r)*c
    68  //                         ~ expm1(r) + c + r*c
    69  //      Thus c+r*c will be added in as the correction terms for
    70  //      expm1(r+c). Now rearrange the term to avoid optimization
    71  //      screw up:
    72  //                      (      2                                    2 )
    73  //                      ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
    74  //       expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
    75  //                      ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
    76  //                      (                                             )
    77  //
    78  //                 = r - E
    79  //   3. Scale back to obtain expm1(x):
    80  //      From step 1, we have
    81  //         expm1(x) = either 2**k*[expm1(r)+1] - 1
    82  //                  = or     2**k*[expm1(r) + (1-2**-k)]
    83  //   4. Implementation notes:
    84  //      (A). To save one multiplication, we scale the coefficient Qi
    85  //           to Qi*2**i, and replace z by (x**2)/2.
    86  //      (B). To achieve maximum accuracy, we compute expm1(x) by
    87  //        (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
    88  //        (ii)  if k=0, return r-E
    89  //        (iii) if k=-1, return 0.5*(r-E)-0.5
    90  //        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
    91  //                     else          return  1.0+2.0*(r-E);
    92  //        (v)   if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
    93  //        (vi)  if k <= 20, return 2**k((1-2**-k)-(E-r)), else
    94  //        (vii) return 2**k(1-((E+2**-k)-r))
    95  //
    96  // Special cases:
    97  //      expm1(INF) is INF, expm1(NaN) is NaN;
    98  //      expm1(-INF) is -1, and
    99  //      for finite argument, only expm1(0)=0 is exact.
   100  //
   101  // Accuracy:
   102  //      according to an error analysis, the error is always less than
   103  //      1 ulp (unit in the last place).
   104  //
   105  // Misc. info.
   106  //      For IEEE double
   107  //          if x >  7.09782712893383973096e+02 then expm1(x) overflow
   108  //
   109  // Constants:
   110  // The hexadecimal values are the intended ones for the following
   111  // constants. The decimal values may be used, provided that the
   112  // compiler will convert from decimal to binary accurately enough
   113  // to produce the hexadecimal values shown.
   114  //
   115  
   116  // Expm1 returns e**x - 1, the base-e exponential of x minus 1.
   117  // It is more accurate than Exp(x) - 1 when x is near zero.
   118  //
   119  // Special cases are:
   120  //
   121  //	Expm1(+Inf) = +Inf
   122  //	Expm1(-Inf) = -1
   123  //	Expm1(NaN) = NaN
   124  //
   125  // Very large values overflow to -1 or +Inf.
   126  func Expm1(x float32) float32 {
   127  	return expm1(x)
   128  }
   129  
   130  func expm1(x float32) float32 {
   131  	const (
   132  		Othreshold = 89.415985                   // 0x42b2d4fc
   133  		Ln2X27     = 1.871497344970703125e+01    // 0x4195b844
   134  		Ln2HalfX3  = 1.0397207736968994140625    // 0x3F851592
   135  		Ln2Half    = 3.465735912322998046875e-01 // 0x3eb17218
   136  		Ln2Hi      = 6.9313812256e-01            // 0x3f317180
   137  		Ln2Lo      = 9.0580006145e-06            // 0x3717f7d1
   138  		InvLn2     = 1.4426950216e+00            // 0x3fb8aa3b
   139  		Tiny       = 1.0 / (1 << 54)             // 2**-54 = 0x3c90000000000000
   140  
   141  		/* scaled coefficients related to expm1 */
   142  		Q1 = -3.3333335072e-02 /* 0xbd088889 */
   143  		Q2 = 1.5873016091e-03  /* 0x3ad00d01 */
   144  		Q3 = -7.9365076090e-05 /* 0xb8a670cd */
   145  		Q4 = 4.0082177293e-06  /* 0x36867e54 */
   146  		Q5 = -2.0109921195e-07 /* 0xb457edbb */
   147  	)
   148  
   149  	// special cases
   150  	switch {
   151  	case IsInf(x, 1) || IsNaN(x):
   152  		return x
   153  	case IsInf(x, -1):
   154  		return -1
   155  	}
   156  
   157  	absx := x
   158  	sign := false
   159  	if x < 0 {
   160  		absx = -absx
   161  		sign = true
   162  	}
   163  
   164  	// filter out huge argument
   165  	if absx >= Ln2X27 { // if |x| >= 27 * ln2
   166  		if sign {
   167  			return -1 // x < -56*ln2, return -1
   168  		}
   169  		if absx >= Othreshold { // if |x| >= 89.415985...
   170  			return Inf(1)
   171  		}
   172  	}
   173  
   174  	// argument reduction
   175  	var c float32
   176  	var k int
   177  	if absx > Ln2Half { // if  |x| > 0.5 * ln2
   178  		var hi, lo float32
   179  		if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2
   180  			if !sign {
   181  				hi = x - Ln2Hi
   182  				lo = Ln2Lo
   183  				k = 1
   184  			} else {
   185  				hi = x + Ln2Hi
   186  				lo = -Ln2Lo
   187  				k = -1
   188  			}
   189  		} else {
   190  			if !sign {
   191  				k = int(InvLn2*x + 0.5)
   192  			} else {
   193  				k = int(InvLn2*x - 0.5)
   194  			}
   195  			t := float32(k)
   196  			hi = x - t*Ln2Hi // t * Ln2Hi is exact here
   197  			lo = t * Ln2Lo
   198  		}
   199  		x = hi - lo
   200  		c = (hi - x) - lo
   201  	} else if absx < Tiny { // when |x| < 2**-54, return x
   202  		return x
   203  	} else {
   204  		k = 0
   205  	}
   206  
   207  	// x is now in primary range
   208  	hfx := 0.5 * x
   209  	hxs := x * hfx
   210  	r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))))
   211  	t := 3 - r1*hfx
   212  	e := hxs * ((r1 - t) / (6.0 - x*t))
   213  	if k != 0 {
   214  		e = (x*(e-c) - c)
   215  		e -= hxs
   216  		switch {
   217  		case k == -1:
   218  			return 0.5*(x-e) - 0.5
   219  		case k == 1:
   220  			if x < -0.25 {
   221  				return -2 * (e - (x + 0.5))
   222  			}
   223  			return 1 + 2*(x-e)
   224  		case k <= -2 || k > 56: // suffice to return exp(x)-1
   225  			y := 1 - (e - x)
   226  			y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
   227  			return y - 1
   228  		}
   229  		if k < 20 {
   230  			t := Float32frombits(0x3f800000 - (0x1000000 >> uint(k))) // t=1-2**-k
   231  			y := t - (e - x)
   232  			y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
   233  			return y
   234  		}
   235  		t := Float32frombits(uint32(0x7f-k) << 23) // 2**-k
   236  		y := x - (e + t)
   237  		y += 1
   238  		y = Float32frombits(Float32bits(y) + uint32(k)<<23) // add k to y's exponent
   239  		return y
   240  	}
   241  	return x - (x*e - hxs) // c is 0
   242  }