github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/math/exp_amd64.s (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // The method is based on a paper by Naoki Shibata: "Efficient evaluation 8 // methods of elementary functions suitable for SIMD computation", Proc. 9 // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 10 // (May 2010). The paper is available at 11 // https://link.springer.com/article/10.1007/s00450-010-0108-2 12 // 13 // The original code and the constants below are from the author's 14 // implementation available at http://freshmeat.net/projects/sleef. 15 // The README file says, "The software is in public domain. 16 // You can use the software without any obligation." 17 // 18 // This code is a simplified version of the original. 19 20 #define LN2 0.6931471805599453094172321214581766 // log_e(2) 21 #define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 22 #define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 23 #define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 24 #define PosInf 0x7FF0000000000000 25 #define NegInf 0xFFF0000000000000 26 #define Overflow 7.09782712893384e+02 27 28 DATA exprodata<>+0(SB)/8, $0.5 29 DATA exprodata<>+8(SB)/8, $1.0 30 DATA exprodata<>+16(SB)/8, $2.0 31 DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1 32 DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2 33 DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3 34 DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3 35 DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4 36 DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5 37 GLOBL exprodata<>+0(SB), RODATA, $72 38 39 // func Exp(x float64) float64 40 TEXT ·archExp(SB),NOSPLIT,$0 41 // test bits for not-finite 42 MOVQ x+0(FP), BX 43 MOVQ $~(1<<63), AX // sign bit mask 44 MOVQ BX, DX 45 ANDQ AX, DX 46 MOVQ $PosInf, AX 47 CMPQ AX, DX 48 JLE notFinite 49 // check if argument will overflow 50 MOVQ BX, X0 51 MOVSD $Overflow, X1 52 COMISD X1, X0 53 JA overflow 54 MOVSD $LOG2E, X1 55 MULSD X0, X1 56 CVTSD2SL X1, BX // BX = exponent 57 CVTSL2SD BX, X1 58 CMPB ·useFMA(SB), $1 59 JE avxfma 60 MOVSD $LN2U, X2 61 MULSD X1, X2 62 SUBSD X2, X0 63 MOVSD $LN2L, X2 64 MULSD X1, X2 65 SUBSD X2, X0 66 // reduce argument 67 MULSD $0.0625, X0 68 // Taylor series evaluation 69 MOVSD exprodata<>+64(SB), X1 70 MULSD X0, X1 71 ADDSD exprodata<>+56(SB), X1 72 MULSD X0, X1 73 ADDSD exprodata<>+48(SB), X1 74 MULSD X0, X1 75 ADDSD exprodata<>+40(SB), X1 76 MULSD X0, X1 77 ADDSD exprodata<>+32(SB), X1 78 MULSD X0, X1 79 ADDSD exprodata<>+24(SB), X1 80 MULSD X0, X1 81 ADDSD exprodata<>+0(SB), X1 82 MULSD X0, X1 83 ADDSD exprodata<>+8(SB), X1 84 MULSD X1, X0 85 MOVSD exprodata<>+16(SB), X1 86 ADDSD X0, X1 87 MULSD X1, X0 88 MOVSD exprodata<>+16(SB), X1 89 ADDSD X0, X1 90 MULSD X1, X0 91 MOVSD exprodata<>+16(SB), X1 92 ADDSD X0, X1 93 MULSD X1, X0 94 MOVSD exprodata<>+16(SB), X1 95 ADDSD X0, X1 96 MULSD X1, X0 97 ADDSD exprodata<>+8(SB), X0 98 // return fr * 2**exponent 99 ldexp: 100 ADDL $0x3FF, BX // add bias 101 JLE denormal 102 CMPL BX, $0x7FF 103 JGE overflow 104 lastStep: 105 SHLQ $52, BX 106 MOVQ BX, X1 107 MULSD X1, X0 108 MOVSD X0, ret+8(FP) 109 RET 110 notFinite: 111 // test bits for -Inf 112 MOVQ $NegInf, AX 113 CMPQ AX, BX 114 JNE notNegInf 115 // -Inf, return 0 116 underflow: // return 0 117 MOVQ $0, ret+8(FP) 118 RET 119 overflow: // return +Inf 120 MOVQ $PosInf, BX 121 notNegInf: // NaN or +Inf, return x 122 MOVQ BX, ret+8(FP) 123 RET 124 denormal: 125 CMPL BX, $-52 126 JL underflow 127 ADDL $0x3FE, BX // add bias - 1 128 SHLQ $52, BX 129 MOVQ BX, X1 130 MULSD X1, X0 131 MOVQ $1, BX 132 JMP lastStep 133 134 avxfma: 135 MOVSD $LN2U, X2 136 VFNMADD231SD X2, X1, X0 137 MOVSD $LN2L, X2 138 VFNMADD231SD X2, X1, X0 139 // reduce argument 140 MULSD $0.0625, X0 141 // Taylor series evaluation 142 MOVSD exprodata<>+64(SB), X1 143 VFMADD213SD exprodata<>+56(SB), X0, X1 144 VFMADD213SD exprodata<>+48(SB), X0, X1 145 VFMADD213SD exprodata<>+40(SB), X0, X1 146 VFMADD213SD exprodata<>+32(SB), X0, X1 147 VFMADD213SD exprodata<>+24(SB), X0, X1 148 VFMADD213SD exprodata<>+0(SB), X0, X1 149 VFMADD213SD exprodata<>+8(SB), X0, X1 150 MULSD X1, X0 151 VADDSD exprodata<>+16(SB), X0, X1 152 MULSD X1, X0 153 VADDSD exprodata<>+16(SB), X0, X1 154 MULSD X1, X0 155 VADDSD exprodata<>+16(SB), X0, X1 156 MULSD X1, X0 157 VADDSD exprodata<>+16(SB), X0, X1 158 VFMADD213SD exprodata<>+8(SB), X1, X0 159 JMP ldexp