gitee.com/quant1x/num@v0.3.2/math32/exp_arm64.s (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #define	Ln2Hi	6.9313812256e-01
     6  #define	Ln2Lo	9.0580006145e-06
     7  #define	Log2e	1.4426950216e+00
     8  #define	Overflow	7.097827e+02
     9  #define	Underflow	-7.451332e+02
    10  #define	Overflow2	1.024000e+03
    11  #define	Underflow2	-1.0740e+03
    12  #define	NearZero	0x317fffff	// 2**-28
    13  #define	PosInf	0x7f800000
    14  #define	FracMask	0x07fffff
    15  #define	C1	0x34000000	// 2**-23
    16  #define	P1	1.6666667163e-01	// 0x3FC55555; 0x55555555
    17  #define	P2	-2.7777778450e-03	// 0xBF66C16C; 0x16BEBD93
    18  #define	P3	6.6137559770e-05	// 0x3F11566A; 0xAF25DE2C
    19  #define	P4	-1.6533901999e-06	// 0xBEBBBD41; 0xC5D26BF1
    20  #define	P5	4.1381369442e-08	// 0x3E663769; 0x72BEA4D0
    21  
    22  // Exp returns e**x, the base-e exponential of x.
    23  // This is an assembly implementation of the method used for function Exp in file exp.go.
    24  //
    25  // func Exp(x float32) float32
    26  TEXT ·Exp(SB),$0-16
    27  	FMOVS	x+0(FP), F0	// F0 = x
    28  	FCMPS	F0, F0
    29  	BNE	isNaN		// x = NaN, return NaN
    30  	FMOVS	$Overflow, F1
    31  	FCMPS	F1, F0
    32  	BGT	overflow	// x > Overflow, return PosInf
    33  	FMOVS	$Underflow, F1
    34  	FCMPS	F1, F0
    35  	BLT	underflow	// x < Underflow, return 0
    36  	MOVW	$NearZero, R0
    37  	FMOVS	R0, F2
    38  	FABSS	F0, F3
    39  	FMOVS	$1.0, F1	// F1 = 1.0
    40  	FCMPS	F2, F3
    41  	BLT	nearzero	// fabs(x) < NearZero, return 1 + x
    42  	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
    43  	// computed as r = hi - lo for extra precision.
    44  	FMOVS	$Log2e, F2
    45  	FMOVS	$0.5, F3
    46  	FNMSUBS	F0, F3, F2, F4	// Log2e*x - 0.5
    47  	FMADDS	F0, F3, F2, F3	// Log2e*x + 0.5
    48  	FCMPS	$0.0, F0
    49  	FCSELS	LT, F4, F3, F3	// F3 = k
    50  	FCVTZSS	F3, R1		// R1 = int(k)
    51  	SCVTFS	R1, F3		// F3 = float32(int(k))
    52  	FMOVS	$Ln2Hi, F4	// F4 = Ln2Hi
    53  	FMOVS	$Ln2Lo, F5	// F5 = Ln2Lo
    54  	FMSUBS	F3, F0, F4, F4	// F4 = hi = x - float32(int(k))*Ln2Hi
    55  	FMULS	F3, F5		// F5 = lo = float32(int(k)) * Ln2Lo
    56  	FSUBS	F5, F4, F6	// F6 = r = hi - lo
    57  	FMULS	F6, F6, F7	// F7 = t = r * r
    58  	// compute y
    59  	FMOVS	$P5, F8		// F8 = P5
    60  	FMOVS	$P4, F9		// F9 = P4
    61  	FMADDS	F7, F9, F8, F13	// P4+t*P5
    62  	FMOVS	$P3, F10	// F10 = P3
    63  	FMADDS	F7, F10, F13, F13	// P3+t*(P4+t*P5)
    64  	FMOVS	$P2, F11	// F11 = P2
    65  	FMADDS	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
    66  	FMOVS	$P1, F12	// F12 = P1
    67  	FMADDS	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
    68  	FMSUBS	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
    69  	FMOVS	$2.0, F14
    70  	FSUBS	F13, F14
    71  	FMULS	F6, F13, F15
    72  	FDIVS	F14, F15	// F15 = (r*c)/(2-c)
    73  	FSUBS	F15, F5, F15	// lo-(r*c)/(2-c)
    74  	FSUBS	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
    75  	FSUBS	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
    76  	// inline Ldexp(y, k), benefit:
    77  	// 1, no parameter pass overhead.
    78  	// 2, skip unnecessary checks for Inf/NaN/Zero
    79  	FMOVS	F16, R0
    80  	ANDS	$FracMask, R0, R2	// fraction
    81  	LSRW	$23, R0, R5	// exponent
    82  	ADDS	R1, R5		// R1 = int(k)
    83  	CMPW	$1, R5
    84  	BGE	normal
    85  	ADDS	$23, R5		// denormal
    86  	MOVW	$C1, R8
    87  	FMOVS	R8, F1		// m = 2**-23
    88  normal:
    89  	ORRW	R5<<23, R2, R0
    90  	FMOVS	R0, F0
    91  	FMULS	F1, F0		// return m * x
    92  	FMOVS	F0, ret+8(FP)
    93  	RET
    94  nearzero:
    95  	FADDS	F1, F0
    96  isNaN:
    97  	FMOVS	F0, ret+8(FP)
    98  	RET
    99  underflow:
   100  	MOVW	ZR, ret+8(FP)
   101  	RET
   102  overflow:
   103  	MOVW	$PosInf, R0
   104  	MOVW	R0, ret+8(FP)
   105  	RET
   106  
   107  
   108  // Exp2 returns 2**x, the base-2 exponential of x.
   109  // This is an assembly implementation of the method used for function Exp2 in file exp.go.
   110  //
   111  // func Exp2(x float32) float32
   112  TEXT ·Exp2(SB),$0-16
   113  	FMOVS	x+0(FP), F0	// F0 = x
   114  	FCMPS	F0, F0
   115  	BNE	isNaN		// x = NaN, return NaN
   116  	FMOVS	$Overflow2, F1
   117  	FCMPS	F1, F0
   118  	BGT	overflow	// x > Overflow, return PosInf
   119  	FMOVS	$Underflow2, F1
   120  	FCMPS	F1, F0
   121  	BLT	underflow	// x < Underflow, return 0
   122  	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
   123  	// computed as r = hi - lo for extra precision.
   124  	FMOVS	$0.5, F2
   125  	FSUBS	F2, F0, F3	// x + 0.5
   126  	FADDS	F2, F0, F4	// x - 0.5
   127  	FCMPS	$0.0, F0
   128  	FCSELS	LT, F3, F4, F3	// F3 = k
   129  	FCVTZSS	F3, R1		// R1 = int(k)
   130  	SCVTFS	R1, F3		// F3 = float32(int(k))
   131  	FSUBS	F3, F0, F3	// t = x - float32(int(k))
   132  	FMOVS	$Ln2Hi, F4	// F4 = Ln2Hi
   133  	FMOVS	$Ln2Lo, F5	// F5 = Ln2Lo
   134  	FMULS	F3, F4		// F4 = hi = t * Ln2Hi
   135  	FNMULS	F3, F5		// F5 = lo = -t * Ln2Lo
   136  	FSUBS	F5, F4, F6	// F6 = r = hi - lo
   137  	FMULS	F6, F6, F7	// F7 = t = r * r
   138  	// compute y
   139  	FMOVS	$P5, F8		// F8 = P5
   140  	FMOVS	$P4, F9		// F9 = P4
   141  	FMADDS	F7, F9, F8, F13	// P4+t*P5
   142  	FMOVS	$P3, F10	// F10 = P3
   143  	FMADDS	F7, F10, F13, F13	// P3+t*(P4+t*P5)
   144  	FMOVS	$P2, F11	// F11 = P2
   145  	FMADDS	F7, F11, F13, F13	// P2+t*(P3+t*(P4+t*P5))
   146  	FMOVS	$P1, F12	// F12 = P1
   147  	FMADDS	F7, F12, F13, F13	// P1+t*(P2+t*(P3+t*(P4+t*P5)))
   148  	FMSUBS	F7, F6, F13, F13	// F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
   149  	FMOVS	$2.0, F14
   150  	FSUBS	F13, F14
   151  	FMULS	F6, F13, F15
   152  	FDIVS	F14, F15	// F15 = (r*c)/(2-c)
   153  	FMOVS	$1.0, F1	// F1 = 1.0
   154  	FSUBS	F15, F5, F15	// lo-(r*c)/(2-c)
   155  	FSUBS	F4, F15, F15	// (lo-(r*c)/(2-c))-hi
   156  	FSUBS	F15, F1, F16	// F16 = y = 1-((lo-(r*c)/(2-c))-hi)
   157  	// inline Ldexp(y, k), benefit:
   158  	// 1, no parameter pass overhead.
   159  	// 2, skip unnecessary checks for Inf/NaN/Zero
   160  	FMOVS	F16, R0
   161  	ANDS	$FracMask, R0, R2	// fraction
   162  	LSRW	$23, R0, R5	// exponent
   163  	ADDS	R1, R5		// R1 = int(k)
   164  	CMPW	$1, R5
   165  	BGE	normal
   166  	ADDS	$23, R5		// denormal
   167  	MOVW	$C1, R8
   168  	FMOVS	R8, F1		// m = 2**-52
   169  normal:
   170  	ORRW	R5<<23, R2, R0
   171  	FMOVS	R0, F0
   172  	FMULS	F1, F0		// return m * x
   173  isNaN:
   174  	FMOVS	F0, ret+8(FP)
   175  	RET
   176  underflow:
   177  	MOVW	ZR, ret+8(FP)
   178  	RET
   179  overflow:
   180  	MOVW	$PosInf, R0
   181  	MOVW	R0, ret+8(FP)
   182  	RET