github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/math/exp_amd64.s (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // The method is based on a paper by Naoki Shibata: "Efficient evaluation 8 // methods of elementary functions suitable for SIMD computation", Proc. 9 // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 10 // (May 2010). The paper is available at 11 // http://www.springerlink.com/content/340228x165742104/ 12 // 13 // The original code and the constants below are from the author's 14 // implementation available at http://freshmeat.net/projects/sleef. 15 // The README file says, "The software is in public domain. 16 // You can use the software without any obligation." 17 // 18 // This code is a simplified version of the original. 19 20 #define LN2 0.6931471805599453094172321214581766 // log_e(2) 21 #define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 22 #define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 23 #define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 24 #define PosInf 0x7FF0000000000000 25 #define NegInf 0xFFF0000000000000 26 #define Overflow 7.09782712893384e+02 27 28 DATA exprodata<>+0(SB)/8, $0.5 29 DATA exprodata<>+8(SB)/8, $1.0 30 DATA exprodata<>+16(SB)/8, $2.0 31 DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1 32 DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2 33 DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3 34 DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3 35 DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4 36 DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5 37 GLOBL exprodata<>+0(SB), RODATA, $72 38 39 // func Exp(x float64) float64 40 TEXT ·Exp(SB),NOSPLIT,$0 41 // test bits for not-finite 42 MOVQ x+0(FP), BX 43 MOVQ $~(1<<63), AX // sign bit mask 44 MOVQ BX, DX 45 ANDQ AX, DX 46 MOVQ $PosInf, AX 47 CMPQ AX, DX 48 JLE notFinite 49 // check if argument will overflow 50 MOVQ BX, X0 51 MOVSD $Overflow, X1 52 COMISD X1, X0 53 JA overflow 54 MOVSD $LOG2E, X1 55 MULSD X0, X1 56 CVTSD2SL X1, BX // BX = exponent 57 CVTSL2SD BX, X1 58 CMPB ·useFMA(SB), $1 59 JE avxfma 60 MOVSD $LN2U, X2 61 MULSD X1, X2 62 SUBSD X2, X0 63 MOVSD $LN2L, X2 64 MULSD X1, X2 65 SUBSD X2, X0 66 // reduce argument 67 MULSD $0.0625, X0 68 // Taylor series evaluation 69 MOVSD exprodata<>+64(SB), X1 70 MULSD X0, X1 71 ADDSD exprodata<>+56(SB), X1 72 MULSD X0, X1 73 ADDSD exprodata<>+48(SB), X1 74 MULSD X0, X1 75 ADDSD exprodata<>+40(SB), X1 76 MULSD X0, X1 77 ADDSD exprodata<>+32(SB), X1 78 MULSD X0, X1 79 ADDSD exprodata<>+24(SB), X1 80 MULSD X0, X1 81 ADDSD exprodata<>+0(SB), X1 82 MULSD X0, X1 83 ADDSD exprodata<>+8(SB), X1 84 MULSD X1, X0 85 MOVSD exprodata<>+16(SB), X1 86 ADDSD X0, X1 87 MULSD X1, X0 88 MOVSD exprodata<>+16(SB), X1 89 ADDSD X0, X1 90 MULSD X1, X0 91 MOVSD exprodata<>+16(SB), X1 92 ADDSD X0, X1 93 MULSD X1, X0 94 MOVSD exprodata<>+16(SB), X1 95 ADDSD X0, X1 96 MULSD X1, X0 97 ADDSD exprodata<>+8(SB), X0 98 // return fr * 2**exponent 99 lastStep: 100 MOVL $0x3FF, AX // bias 101 ADDL AX, BX 102 JLE underflow 103 CMPL BX, $0x7FF 104 JGE overflow 105 MOVL $52, CX 106 SHLQ CX, BX 107 MOVQ BX, X1 108 MULSD X1, X0 109 MOVSD X0, ret+8(FP) 110 RET 111 notFinite: 112 // test bits for -Inf 113 MOVQ $NegInf, AX 114 CMPQ AX, BX 115 JNE notNegInf 116 // -Inf, return 0 117 underflow: // return 0 118 MOVQ $0, AX 119 MOVQ AX, ret+8(FP) 120 RET 121 overflow: // return +Inf 122 MOVQ $PosInf, BX 123 notNegInf: // NaN or +Inf, return x 124 MOVQ BX, ret+8(FP) 125 RET 126 127 avxfma: 128 MOVSD $LN2U, X2 129 VFNMADD231SD X2, X1, X0 130 MOVSD $LN2L, X2 131 VFNMADD231SD X2, X1, X0 132 // reduce argument 133 MULSD $0.0625, X0 134 // Taylor series evaluation 135 MOVSD exprodata<>+64(SB), X1 136 VFMADD213SD exprodata<>+56(SB), X0, X1 137 VFMADD213SD exprodata<>+48(SB), X0, X1 138 VFMADD213SD exprodata<>+40(SB), X0, X1 139 VFMADD213SD exprodata<>+32(SB), X0, X1 140 VFMADD213SD exprodata<>+24(SB), X0, X1 141 VFMADD213SD exprodata<>+0(SB), X0, X1 142 VFMADD213SD exprodata<>+8(SB), X0, X1 143 MULSD X1, X0 144 VADDSD exprodata<>+16(SB), X0, X1 145 MULSD X1, X0 146 VADDSD exprodata<>+16(SB), X0, X1 147 MULSD X1, X0 148 VADDSD exprodata<>+16(SB), X0, X1 149 MULSD X1, X0 150 VADDSD exprodata<>+16(SB), X0, X1 151 VFMADD213SD exprodata<>+8(SB), X1, X0 152 JMP lastStep