github.com/afumu/libc@v0.0.6/musl/src/math/powl.c

github.com/afumu/libc@v0.0.6/musl/src/math/powl.c (about)

     1  /* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
     2  /*
     3   * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
     4   *
     5   * Permission to use, copy, modify, and distribute this software for any
     6   * purpose with or without fee is hereby granted, provided that the above
     7   * copyright notice and this permission notice appear in all copies.
     8   *
     9   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    10   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    11   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    12   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    13   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    14   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    15   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    16   */
    17  /*                                                      powl.c
    18   *
    19   *      Power function, long double precision
    20   *
    21   *
    22   * SYNOPSIS:
    23   *
    24   * long double x, y, z, powl();
    25   *
    26   * z = powl( x, y );
    27   *
    28   *
    29   * DESCRIPTION:
    30   *
    31   * Computes x raised to the yth power.  Analytically,
    32   *
    33   *      x**y  =  exp( y log(x) ).
    34   *
    35   * Following Cody and Waite, this program uses a lookup table
    36   * of 2**-i/32 and pseudo extended precision arithmetic to
    37   * obtain several extra bits of accuracy in both the logarithm
    38   * and the exponential.
    39   *
    40   *
    41   * ACCURACY:
    42   *
    43   * The relative error of pow(x,y) can be estimated
    44   * by   y dl ln(2),   where dl is the absolute error of
    45   * the internally computed base 2 logarithm.  At the ends
    46   * of the approximation interval the logarithm equal 1/32
    47   * and its relative error is about 1 lsb = 1.1e-19.  Hence
    48   * the predicted relative error in the result is 2.3e-21 y .
    49   *
    50   *                      Relative error:
    51   * arithmetic   domain     # trials      peak         rms
    52   *
    53   *    IEEE     +-1000       40000      2.8e-18      3.7e-19
    54   * .001 < x < 1000, with log(x) uniformly distributed.
    55   * -1000 < y < 1000, y uniformly distributed.
    56   *
    57   *    IEEE     0,8700       60000      6.5e-18      1.0e-18
    58   * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
    59   *
    60   *
    61   * ERROR MESSAGES:
    62   *
    63   *   message         condition      value returned
    64   * pow overflow     x**y > MAXNUM      INFINITY
    65   * pow underflow   x**y < 1/MAXNUM       0.0
    66   * pow domain      x<0 and y noninteger  0.0
    67   *
    68   */
    69  
    70  #include "libm.h"
    71  
    72  #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
    73  long double powl(long double x, long double y)
    74  {
    75  	return pow(x, y);
    76  }
    77  #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
    78  
    79  /* Table size */
    80  #define NXT 32
    81  
    82  /* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
    83   * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
    84   */
    85  static const long double P[] = {
    86   8.3319510773868690346226E-4L,
    87   4.9000050881978028599627E-1L,
    88   1.7500123722550302671919E0L,
    89   1.4000100839971580279335E0L,
    90  };
    91  static const long double Q[] = {
    92  /* 1.0000000000000000000000E0L,*/
    93   5.2500282295834889175431E0L,
    94   8.4000598057587009834666E0L,
    95   4.2000302519914740834728E0L,
    96  };
    97  /* A[i] = 2^(-i/32), rounded to IEEE long double precision.
    98   * If i is even, A[i] + B[i/2] gives additional accuracy.
    99   */
   100  static const long double A[33] = {
   101   1.0000000000000000000000E0L,
   102   9.7857206208770013448287E-1L,
   103   9.5760328069857364691013E-1L,
   104   9.3708381705514995065011E-1L,
   105   9.1700404320467123175367E-1L,
   106   8.9735453750155359320742E-1L,
   107   8.7812608018664974155474E-1L,
   108   8.5930964906123895780165E-1L,
   109   8.4089641525371454301892E-1L,
   110   8.2287773907698242225554E-1L,
   111   8.0524516597462715409607E-1L,
   112   7.8799042255394324325455E-1L,
   113   7.7110541270397041179298E-1L,
   114   7.5458221379671136985669E-1L,
   115   7.3841307296974965571198E-1L,
   116   7.2259040348852331001267E-1L,
   117   7.0710678118654752438189E-1L,
   118   6.9195494098191597746178E-1L,
   119   6.7712777346844636413344E-1L,
   120   6.6261832157987064729696E-1L,
   121   6.4841977732550483296079E-1L,
   122   6.3452547859586661129850E-1L,
   123   6.2092890603674202431705E-1L,
   124   6.0762367999023443907803E-1L,
   125   5.9460355750136053334378E-1L,
   126   5.8186242938878875689693E-1L,
   127   5.6939431737834582684856E-1L,
   128   5.5719337129794626814472E-1L,
   129   5.4525386633262882960438E-1L,
   130   5.3357020033841180906486E-1L,
   131   5.2213689121370692017331E-1L,
   132   5.1094857432705833910408E-1L,
   133   5.0000000000000000000000E-1L,
   134  };
   135  static const long double B[17] = {
   136   0.0000000000000000000000E0L,
   137   2.6176170809902549338711E-20L,
   138  -1.0126791927256478897086E-20L,
   139   1.3438228172316276937655E-21L,
   140   1.2207982955417546912101E-20L,
   141  -6.3084814358060867200133E-21L,
   142   1.3164426894366316434230E-20L,
   143  -1.8527916071632873716786E-20L,
   144   1.8950325588932570796551E-20L,
   145   1.5564775779538780478155E-20L,
   146   6.0859793637556860974380E-21L,
   147  -2.0208749253662532228949E-20L,
   148   1.4966292219224761844552E-20L,
   149   3.3540909728056476875639E-21L,
   150  -8.6987564101742849540743E-22L,
   151  -1.2327176863327626135542E-20L,
   152   0.0000000000000000000000E0L,
   153  };
   154  
   155  /* 2^x = 1 + x P(x),
   156   * on the interval -1/32 <= x <= 0
   157   */
   158  static const long double R[] = {
   159   1.5089970579127659901157E-5L,
   160   1.5402715328927013076125E-4L,
   161   1.3333556028915671091390E-3L,
   162   9.6181291046036762031786E-3L,
   163   5.5504108664798463044015E-2L,
   164   2.4022650695910062854352E-1L,
   165   6.9314718055994530931447E-1L,
   166  };
   167  
   168  #define MEXP (NXT*16384.0L)
   169  /* The following if denormal numbers are supported, else -MEXP: */
   170  #define MNEXP (-NXT*(16384.0L+64.0L))
   171  /* log2(e) - 1 */
   172  #define LOG2EA 0.44269504088896340735992L
   173  
   174  #define F W
   175  #define Fa Wa
   176  #define Fb Wb
   177  #define G W
   178  #define Ga Wa
   179  #define Gb u
   180  #define H W
   181  #define Ha Wb
   182  #define Hb Wb
   183  
   184  static const long double MAXLOGL = 1.1356523406294143949492E4L;
   185  static const long double MINLOGL = -1.13994985314888605586758E4L;
   186  static const long double LOGE2L = 6.9314718055994530941723E-1L;
   187  static const long double huge = 0x1p10000L;
   188  /* XXX Prevent gcc from erroneously constant folding this. */
   189  static const volatile long double twom10000 = 0x1p-10000L;
   190  
   191  static long double reducl(long double);
   192  static long double powil(long double, int);
   193  
   194  long double powl(long double x, long double y)
   195  {
   196  	/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
   197  	int i, nflg, iyflg, yoddint;
   198  	long e;
   199  	volatile long double z=0;
   200  	long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
   201  
   202  	/* make sure no invalid exception is raised by nan comparision */
   203  	if (isnan(x)) {
   204  		if (!isnan(y) && y == 0.0)
   205  			return 1.0;
   206  		return x;
   207  	}
   208  	if (isnan(y)) {
   209  		if (x == 1.0)
   210  			return 1.0;
   211  		return y;
   212  	}
   213  	if (x == 1.0)
   214  		return 1.0; /* 1**y = 1, even if y is nan */
   215  	if (x == -1.0 && !isfinite(y))
   216  		return 1.0; /* -1**inf = 1 */
   217  	if (y == 0.0)
   218  		return 1.0; /* x**0 = 1, even if x is nan */
   219  	if (y == 1.0)
   220  		return x;
   221  	if (y >= LDBL_MAX) {
   222  		if (x > 1.0 || x < -1.0)
   223  			return INFINITY;
   224  		if (x != 0.0)
   225  			return 0.0;
   226  	}
   227  	if (y <= -LDBL_MAX) {
   228  		if (x > 1.0 || x < -1.0)
   229  			return 0.0;
   230  		if (x != 0.0 || y == -INFINITY)
   231  			return INFINITY;
   232  	}
   233  	if (x >= LDBL_MAX) {
   234  		if (y > 0.0)
   235  			return INFINITY;
   236  		return 0.0;
   237  	}
   238  
   239  	w = floorl(y);
   240  
   241  	/* Set iyflg to 1 if y is an integer. */
   242  	iyflg = 0;
   243  	if (w == y)
   244  		iyflg = 1;
   245  
   246  	/* Test for odd integer y. */
   247  	yoddint = 0;
   248  	if (iyflg) {
   249  		ya = fabsl(y);
   250  		ya = floorl(0.5 * ya);
   251  		yb = 0.5 * fabsl(w);
   252  		if( ya != yb )
   253  			yoddint = 1;
   254  	}
   255  
   256  	if (x <= -LDBL_MAX) {
   257  		if (y > 0.0) {
   258  			if (yoddint)
   259  				return -INFINITY;
   260  			return INFINITY;
   261  		}
   262  		if (y < 0.0) {
   263  			if (yoddint)
   264  				return -0.0;
   265  			return 0.0;
   266  		}
   267  	}
   268  	nflg = 0; /* (x<0)**(odd int) */
   269  	if (x <= 0.0) {
   270  		if (x == 0.0) {
   271  			if (y < 0.0) {
   272  				if (signbit(x) && yoddint)
   273  					/* (-0.0)**(-odd int) = -inf, divbyzero */
   274  					return -1.0/0.0;
   275  				/* (+-0.0)**(negative) = inf, divbyzero */
   276  				return 1.0/0.0;
   277  			}
   278  			if (signbit(x) && yoddint)
   279  				return -0.0;
   280  			return 0.0;
   281  		}
   282  		if (iyflg == 0)
   283  			return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
   284  		/* (x<0)**(integer) */
   285  		if (yoddint)
   286  			nflg = 1; /* negate result */
   287  		x = -x;
   288  	}
   289  	/* (+integer)**(integer)  */
   290  	if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
   291  		w = powil(x, (int)y);
   292  		return nflg ? -w : w;
   293  	}
   294  
   295  	/* separate significand from exponent */
   296  	x = frexpl(x, &i);
   297  	e = i;
   298  
   299  	/* find significand in antilog table A[] */
   300  	i = 1;
   301  	if (x <= A[17])
   302  		i = 17;
   303  	if (x <= A[i+8])
   304  		i += 8;
   305  	if (x <= A[i+4])
   306  		i += 4;
   307  	if (x <= A[i+2])
   308  		i += 2;
   309  	if (x >= A[1])
   310  		i = -1;
   311  	i += 1;
   312  
   313  	/* Find (x - A[i])/A[i]
   314  	 * in order to compute log(x/A[i]):
   315  	 *
   316  	 * log(x) = log( a x/a ) = log(a) + log(x/a)
   317  	 *
   318  	 * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
   319  	 */
   320  	x -= A[i];
   321  	x -= B[i/2];
   322  	x /= A[i];
   323  
   324  	/* rational approximation for log(1+v):
   325  	 *
   326  	 * log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v)
   327  	 */
   328  	z = x*x;
   329  	w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
   330  	w = w - 0.5*z;
   331  
   332  	/* Convert to base 2 logarithm:
   333  	 * multiply by log2(e) = 1 + LOG2EA
   334  	 */
   335  	z = LOG2EA * w;
   336  	z += w;
   337  	z += LOG2EA * x;
   338  	z += x;
   339  
   340  	/* Compute exponent term of the base 2 logarithm. */
   341  	w = -i;
   342  	w /= NXT;
   343  	w += e;
   344  	/* Now base 2 log of x is w + z. */
   345  
   346  	/* Multiply base 2 log by y, in extended precision. */
   347  
   348  	/* separate y into large part ya
   349  	 * and small part yb less than 1/NXT
   350  	 */
   351  	ya = reducl(y);
   352  	yb = y - ya;
   353  
   354  	/* (w+z)(ya+yb)
   355  	 * = w*ya + w*yb + z*y
   356  	 */
   357  	F = z * y  +  w * yb;
   358  	Fa = reducl(F);
   359  	Fb = F - Fa;
   360  
   361  	G = Fa + w * ya;
   362  	Ga = reducl(G);
   363  	Gb = G - Ga;
   364  
   365  	H = Fb + Gb;
   366  	Ha = reducl(H);
   367  	w = (Ga + Ha) * NXT;
   368  
   369  	/* Test the power of 2 for overflow */
   370  	if (w > MEXP)
   371  		return huge * huge;  /* overflow */
   372  	if (w < MNEXP)
   373  		return twom10000 * twom10000;  /* underflow */
   374  
   375  	e = w;
   376  	Hb = H - Ha;
   377  
   378  	if (Hb > 0.0) {
   379  		e += 1;
   380  		Hb -= 1.0/NXT;  /*0.0625L;*/
   381  	}
   382  
   383  	/* Now the product y * log2(x)  =  Hb + e/NXT.
   384  	 *
   385  	 * Compute base 2 exponential of Hb,
   386  	 * where -0.0625 <= Hb <= 0.
   387  	 */
   388  	z = Hb * __polevll(Hb, R, 6);  /*  z = 2**Hb - 1  */
   389  
   390  	/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
   391  	 * Find lookup table entry for the fractional power of 2.
   392  	 */
   393  	if (e < 0)
   394  		i = 0;
   395  	else
   396  		i = 1;
   397  	i = e/NXT + i;
   398  	e = NXT*i - e;
   399  	w = A[e];
   400  	z = w * z;  /*  2**-e * ( 1 + (2**Hb-1) )  */
   401  	z = z + w;
   402  	z = scalbnl(z, i);  /* multiply by integer power of 2 */
   403  
   404  	if (nflg)
   405  		z = -z;
   406  	return z;
   407  }
   408  
   409  
   410  /* Find a multiple of 1/NXT that is within 1/NXT of x. */
   411  static long double reducl(long double x)
   412  {
   413  	long double t;
   414  
   415  	t = x * NXT;
   416  	t = floorl(t);
   417  	t = t / NXT;
   418  	return t;
   419  }
   420  
   421  /*
   422   *      Positive real raised to integer power, long double precision
   423   *
   424   *
   425   * SYNOPSIS:
   426   *
   427   * long double x, y, powil();
   428   * int n;
   429   *
   430   * y = powil( x, n );
   431   *
   432   *
   433   * DESCRIPTION:
   434   *
   435   * Returns argument x>0 raised to the nth power.
   436   * The routine efficiently decomposes n as a sum of powers of
   437   * two. The desired power is a product of two-to-the-kth
   438   * powers of x.  Thus to compute the 32767 power of x requires
   439   * 28 multiplications instead of 32767 multiplications.
   440   *
   441   *
   442   * ACCURACY:
   443   *
   444   *                      Relative error:
   445   * arithmetic   x domain   n domain  # trials      peak         rms
   446   *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
   447   *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
   448   *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
   449   *
   450   * Returns MAXNUM on overflow, zero on underflow.
   451   */
   452  
   453  static long double powil(long double x, int nn)
   454  {
   455  	long double ww, y;
   456  	long double s;
   457  	int n, e, sign, lx;
   458  
   459  	if (nn == 0)
   460  		return 1.0;
   461  
   462  	if (nn < 0) {
   463  		sign = -1;
   464  		n = -nn;
   465  	} else {
   466  		sign = 1;
   467  		n = nn;
   468  	}
   469  
   470  	/* Overflow detection */
   471  
   472  	/* Calculate approximate logarithm of answer */
   473  	s = x;
   474  	s = frexpl( s, &lx);
   475  	e = (lx - 1)*n;
   476  	if ((e == 0) || (e > 64) || (e < -64)) {
   477  		s = (s - 7.0710678118654752e-1L) / (s +  7.0710678118654752e-1L);
   478  		s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
   479  	} else {
   480  		s = LOGE2L * e;
   481  	}
   482  
   483  	if (s > MAXLOGL)
   484  		return huge * huge;  /* overflow */
   485  
   486  	if (s < MINLOGL)
   487  		return twom10000 * twom10000;  /* underflow */
   488  	/* Handle tiny denormal answer, but with less accuracy
   489  	 * since roundoff error in 1.0/x will be amplified.
   490  	 * The precise demarcation should be the gradual underflow threshold.
   491  	 */
   492  	if (s < -MAXLOGL+2.0) {
   493  		x = 1.0/x;
   494  		sign = -sign;
   495  	}
   496  
   497  	/* First bit of the power */
   498  	if (n & 1)
   499  		y = x;
   500  	else
   501  		y = 1.0;
   502  
   503  	ww = x;
   504  	n >>= 1;
   505  	while (n) {
   506  		ww = ww * ww;   /* arg to the 2-to-the-kth power */
   507  		if (n & 1)     /* if that bit is set, then include in product */
   508  			y *= ww;
   509  		n >>= 1;
   510  	}
   511  
   512  	if (sign < 0)
   513  		y = 1.0/y;
   514  	return y;
   515  }
   516  #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
   517  // TODO: broken implementation to make things compile
   518  long double powl(long double x, long double y)
   519  {
   520  	return pow(x, y);
   521  }
   522  #endif