gitee.com/quant1x/num@v0.3.2/math32/exp_arm64.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #define Ln2Hi 6.9313812256e-01 6 #define Ln2Lo 9.0580006145e-06 7 #define Log2e 1.4426950216e+00 8 #define Overflow 7.097827e+02 9 #define Underflow -7.451332e+02 10 #define Overflow2 1.024000e+03 11 #define Underflow2 -1.0740e+03 12 #define NearZero 0x317fffff // 2**-28 13 #define PosInf 0x7f800000 14 #define FracMask 0x07fffff 15 #define C1 0x34000000 // 2**-23 16 #define P1 1.6666667163e-01 // 0x3FC55555; 0x55555555 17 #define P2 -2.7777778450e-03 // 0xBF66C16C; 0x16BEBD93 18 #define P3 6.6137559770e-05 // 0x3F11566A; 0xAF25DE2C 19 #define P4 -1.6533901999e-06 // 0xBEBBBD41; 0xC5D26BF1 20 #define P5 4.1381369442e-08 // 0x3E663769; 0x72BEA4D0 21 22 // Exp returns e**x, the base-e exponential of x. 23 // This is an assembly implementation of the method used for function Exp in file exp.go. 24 // 25 // func Exp(x float32) float32 26 TEXT ·Exp(SB),$0-16 27 FMOVS x+0(FP), F0 // F0 = x 28 FCMPS F0, F0 29 BNE isNaN // x = NaN, return NaN 30 FMOVS $Overflow, F1 31 FCMPS F1, F0 32 BGT overflow // x > Overflow, return PosInf 33 FMOVS $Underflow, F1 34 FCMPS F1, F0 35 BLT underflow // x < Underflow, return 0 36 MOVW $NearZero, R0 37 FMOVS R0, F2 38 FABSS F0, F3 39 FMOVS $1.0, F1 // F1 = 1.0 40 FCMPS F2, F3 41 BLT nearzero // fabs(x) < NearZero, return 1 + x 42 // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 43 // computed as r = hi - lo for extra precision. 44 FMOVS $Log2e, F2 45 FMOVS $0.5, F3 46 FNMSUBS F0, F3, F2, F4 // Log2e*x - 0.5 47 FMADDS F0, F3, F2, F3 // Log2e*x + 0.5 48 FCMPS $0.0, F0 49 FCSELS LT, F4, F3, F3 // F3 = k 50 FCVTZSS F3, R1 // R1 = int(k) 51 SCVTFS R1, F3 // F3 = float32(int(k)) 52 FMOVS $Ln2Hi, F4 // F4 = Ln2Hi 53 FMOVS $Ln2Lo, F5 // F5 = Ln2Lo 54 FMSUBS F3, F0, F4, F4 // F4 = hi = x - float32(int(k))*Ln2Hi 55 FMULS F3, F5 // F5 = lo = float32(int(k)) * Ln2Lo 56 FSUBS F5, F4, F6 // F6 = r = hi - lo 57 FMULS F6, F6, F7 // F7 = t = r * r 58 // compute y 59 FMOVS $P5, F8 // F8 = P5 60 FMOVS $P4, F9 // F9 = P4 61 FMADDS F7, F9, F8, F13 // P4+t*P5 62 FMOVS $P3, F10 // F10 = P3 63 FMADDS F7, F10, F13, F13 // P3+t*(P4+t*P5) 64 FMOVS $P2, F11 // F11 = P2 65 FMADDS F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5)) 66 FMOVS $P1, F12 // F12 = P1 67 FMADDS F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) 68 FMSUBS F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) 69 FMOVS $2.0, F14 70 FSUBS F13, F14 71 FMULS F6, F13, F15 72 FDIVS F14, F15 // F15 = (r*c)/(2-c) 73 FSUBS F15, F5, F15 // lo-(r*c)/(2-c) 74 FSUBS F4, F15, F15 // (lo-(r*c)/(2-c))-hi 75 FSUBS F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) 76 // inline Ldexp(y, k), benefit: 77 // 1, no parameter pass overhead. 78 // 2, skip unnecessary checks for Inf/NaN/Zero 79 FMOVS F16, R0 80 ANDS $FracMask, R0, R2 // fraction 81 LSRW $23, R0, R5 // exponent 82 ADDS R1, R5 // R1 = int(k) 83 CMPW $1, R5 84 BGE normal 85 ADDS $23, R5 // denormal 86 MOVW $C1, R8 87 FMOVS R8, F1 // m = 2**-23 88 normal: 89 ORRW R5<<23, R2, R0 90 FMOVS R0, F0 91 FMULS F1, F0 // return m * x 92 FMOVS F0, ret+8(FP) 93 RET 94 nearzero: 95 FADDS F1, F0 96 isNaN: 97 FMOVS F0, ret+8(FP) 98 RET 99 underflow: 100 MOVW ZR, ret+8(FP) 101 RET 102 overflow: 103 MOVW $PosInf, R0 104 MOVW R0, ret+8(FP) 105 RET 106 107 108 // Exp2 returns 2**x, the base-2 exponential of x. 109 // This is an assembly implementation of the method used for function Exp2 in file exp.go. 110 // 111 // func Exp2(x float32) float32 112 TEXT ·Exp2(SB),$0-16 113 FMOVS x+0(FP), F0 // F0 = x 114 FCMPS F0, F0 115 BNE isNaN // x = NaN, return NaN 116 FMOVS $Overflow2, F1 117 FCMPS F1, F0 118 BGT overflow // x > Overflow, return PosInf 119 FMOVS $Underflow2, F1 120 FCMPS F1, F0 121 BLT underflow // x < Underflow, return 0 122 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 123 // computed as r = hi - lo for extra precision. 124 FMOVS $0.5, F2 125 FSUBS F2, F0, F3 // x + 0.5 126 FADDS F2, F0, F4 // x - 0.5 127 FCMPS $0.0, F0 128 FCSELS LT, F3, F4, F3 // F3 = k 129 FCVTZSS F3, R1 // R1 = int(k) 130 SCVTFS R1, F3 // F3 = float32(int(k)) 131 FSUBS F3, F0, F3 // t = x - float32(int(k)) 132 FMOVS $Ln2Hi, F4 // F4 = Ln2Hi 133 FMOVS $Ln2Lo, F5 // F5 = Ln2Lo 134 FMULS F3, F4 // F4 = hi = t * Ln2Hi 135 FNMULS F3, F5 // F5 = lo = -t * Ln2Lo 136 FSUBS F5, F4, F6 // F6 = r = hi - lo 137 FMULS F6, F6, F7 // F7 = t = r * r 138 // compute y 139 FMOVS $P5, F8 // F8 = P5 140 FMOVS $P4, F9 // F9 = P4 141 FMADDS F7, F9, F8, F13 // P4+t*P5 142 FMOVS $P3, F10 // F10 = P3 143 FMADDS F7, F10, F13, F13 // P3+t*(P4+t*P5) 144 FMOVS $P2, F11 // F11 = P2 145 FMADDS F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5)) 146 FMOVS $P1, F12 // F12 = P1 147 FMADDS F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) 148 FMSUBS F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) 149 FMOVS $2.0, F14 150 FSUBS F13, F14 151 FMULS F6, F13, F15 152 FDIVS F14, F15 // F15 = (r*c)/(2-c) 153 FMOVS $1.0, F1 // F1 = 1.0 154 FSUBS F15, F5, F15 // lo-(r*c)/(2-c) 155 FSUBS F4, F15, F15 // (lo-(r*c)/(2-c))-hi 156 FSUBS F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) 157 // inline Ldexp(y, k), benefit: 158 // 1, no parameter pass overhead. 159 // 2, skip unnecessary checks for Inf/NaN/Zero 160 FMOVS F16, R0 161 ANDS $FracMask, R0, R2 // fraction 162 LSRW $23, R0, R5 // exponent 163 ADDS R1, R5 // R1 = int(k) 164 CMPW $1, R5 165 BGE normal 166 ADDS $23, R5 // denormal 167 MOVW $C1, R8 168 FMOVS R8, F1 // m = 2**-52 169 normal: 170 ORRW R5<<23, R2, R0 171 FMOVS R0, F0 172 FMULS F1, F0 // return m * x 173 isNaN: 174 FMOVS F0, ret+8(FP) 175 RET 176 underflow: 177 MOVW ZR, ret+8(FP) 178 RET 179 overflow: 180 MOVW $PosInf, R0 181 MOVW R0, ret+8(FP) 182 RET