github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/math/sincos_amd64.s (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // The method is based on a paper by Naoki Shibata: "Efficient evaluation 8 // methods of elementary functions suitable for SIMD computation", Proc. 9 // of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 10 // (May 2010). The paper is available at 11 // http://www.springerlink.com/content/340228x165742104/ 12 // 13 // The original code and the constants below are from the author's 14 // implementation available at http://freshmeat.net/projects/sleef. 15 // The README file says, "The software is in public domain. 16 // You can use the software without any obligation." 17 // 18 // This code is a simplified version of the original. 19 20 #define PosOne 0x3FF0000000000000 21 #define PosInf 0x7FF0000000000000 22 #define NaN 0x7FF8000000000001 23 #define PI4A 0.7853981554508209228515625 // pi/4 split into three parts 24 #define PI4B 0.794662735614792836713604629039764404296875e-8 25 #define PI4C 0.306161699786838294306516483068750264552437361480769e-16 26 #define M4PI 1.273239544735162542821171882678754627704620361328125 // 4/pi 27 #define T0 1.0 28 #define T1 -8.33333333333333333333333e-02 // (-1.0/12) 29 #define T2 2.77777777777777777777778e-03 // (+1.0/360) 30 #define T3 -4.96031746031746031746032e-05 // (-1.0/20160) 31 #define T4 5.51146384479717813051146e-07 // (+1.0/1814400) 32 33 // func Sincos(d float64) (sin, cos float64) 34 TEXT ·Sincos(SB),NOSPLIT,$0 35 // test for special cases 36 MOVQ $~(1<<63), DX // sign bit mask 37 MOVQ x+0(FP), BX 38 ANDQ BX, DX 39 JEQ isZero 40 MOVQ $PosInf, AX 41 CMPQ AX, DX 42 JLE isInfOrNaN 43 // Reduce argument 44 MOVQ BX, X7 // x7= d 45 MOVQ DX, X0 // x0= |d| 46 MOVSD $M4PI, X2 47 MULSD X0, X2 48 CVTTSD2SQ X2, BX // bx= q 49 MOVQ $1, AX 50 ANDQ BX, AX 51 ADDQ BX, AX 52 CVTSQ2SD AX, X2 53 MOVSD $PI4A, X3 54 MULSD X2, X3 55 SUBSD X3, X0 56 MOVSD $PI4B, X3 57 MULSD X2, X3 58 SUBSD X3, X0 59 MOVSD $PI4C, X3 60 MULSD X2, X3 61 SUBSD X3, X0 62 MULSD $0.125, X0 // x0= x, x7= d, bx= q 63 // Evaluate Taylor series 64 MULSD X0, X0 65 MOVSD $T4, X2 66 MULSD X0, X2 67 ADDSD $T3, X2 68 MULSD X0, X2 69 ADDSD $T2, X2 70 MULSD X0, X2 71 ADDSD $T1, X2 72 MULSD X0, X2 73 ADDSD $T0, X2 74 MULSD X2, X0 // x0= x, x7= d, bx= q 75 // Apply double angle formula 76 MOVSD $4.0, X2 77 SUBSD X0, X2 78 MULSD X2, X0 79 MOVSD $4.0, X2 80 SUBSD X0, X2 81 MULSD X2, X0 82 MOVSD $4.0, X2 83 SUBSD X0, X2 84 MULSD X2, X0 85 MULSD $0.5, X0 // x0= x, x7= d, bx= q 86 // sin = sqrt((2 - x) * x) 87 MOVSD $2.0, X2 88 SUBSD X0, X2 89 MULSD X0, X2 90 SQRTSD X2, X2 // x0= x, x2= z, x7= d, bx= q 91 // cos = 1 - x 92 MOVSD $1.0, X1 93 SUBSD X0, X1 // x1= x, x2= z, x7= d, bx= q 94 // if ((q + 1) & 2) != 0 { sin, cos = cos, sin } 95 MOVQ $1, DX 96 ADDQ BX, DX 97 ANDQ $2, DX 98 SHRQ $1, DX 99 SUBQ $1, DX 100 MOVQ DX, X3 101 // sin = (y & z) | (^y & x) 102 MOVAPD X2, X0 103 ANDPD X3, X0 // x0= sin 104 MOVAPD X3, X4 105 ANDNPD X1, X4 106 ORPD X4, X0 // x0= sin, x1= x, x2= z, x3= y, x7= d, bx= q 107 // cos = (y & x) | (^y & z) 108 ANDPD X3, X1 // x1= cos 109 ANDNPD X2, X3 110 ORPD X3, X1 // x0= sin, x1= cos, x7= d, bx= q 111 // if ((q & 4) != 0) != (d < 0) { sin = -sin } 112 MOVQ BX, AX 113 MOVQ $61, CX 114 SHLQ CX, AX 115 MOVQ AX, X3 116 XORPD X7, X3 117 MOVQ $(1<<63), AX 118 MOVQ AX, X2 // x2= -0.0 119 ANDPD X2, X3 120 ORPD X3, X0 // x0= sin, x1= cos, x2= -0.0, bx= q 121 // if ((q + 2) & 4) != 0 { cos = -cos } 122 MOVQ $2, AX 123 ADDQ AX, BX 124 MOVQ $61, CX 125 SHLQ CX, BX 126 MOVQ BX, X3 127 ANDPD X2, X3 128 ORPD X3, X1 // x0= sin, x1= cos 129 // return (sin, cos) 130 MOVSD X0, sin+8(FP) 131 MOVSD X1, cos+16(FP) 132 RET 133 isZero: // return (±0.0, 1.0) 134 MOVQ BX, sin+8(FP) 135 MOVQ $PosOne, AX 136 MOVQ AX, cos+16(FP) 137 RET 138 isInfOrNaN: // return (NaN, NaN) 139 MOVQ $NaN, AX 140 MOVQ AX, sin+8(FP) 141 MOVQ AX, cos+16(FP) 142 RET