github.com/likebike/go--@v0.0.0-20190911215757-0bd925d16e96/go/src/math/atan2_s390x.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 #define PosInf 0x7FF0000000000000 8 #define NegInf 0xFFF0000000000000 9 #define NegZero 0x8000000000000000 10 #define Pi 0x400921FB54442D18 11 #define NegPi 0xC00921FB54442D18 12 #define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4 13 #define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4 14 #define PiDiv4 0x3FE921FB54442D18 // Pi/4 15 #define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4 16 17 // Minimax polynomial coefficients and other constants 18 DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00 19 DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00 20 DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00 21 DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00 22 DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01 23 DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01 24 DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01 25 DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01 26 DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01 27 DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01 28 DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01 29 DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01 30 DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01 31 DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01 32 DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02 33 DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04 34 DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02 35 DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03 36 DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03 37 DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01 38 GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160 39 40 DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b 41 DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b 42 DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b 43 DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b 44 GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32 45 DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000 46 GLOBL ·atan2xpim<> + 0(SB), RODATA, $8 47 48 // Atan2 returns the arc tangent of y/x, using 49 // the signs of the two to determine the quadrant 50 // of the return value. 51 // 52 // Special cases are (in order): 53 // Atan2(y, NaN) = NaN 54 // Atan2(NaN, x) = NaN 55 // Atan2(+0, x>=0) = +0 56 // Atan2(-0, x>=0) = -0 57 // Atan2(+0, x<=-0) = +Pi 58 // Atan2(-0, x<=-0) = -Pi 59 // Atan2(y>0, 0) = +Pi/2 60 // Atan2(y<0, 0) = -Pi/2 61 // Atan2(+Inf, +Inf) = +Pi/4 62 // Atan2(-Inf, +Inf) = -Pi/4 63 // Atan2(+Inf, -Inf) = 3Pi/4 64 // Atan2(-Inf, -Inf) = -3Pi/4 65 // Atan2(y, +Inf) = 0 66 // Atan2(y>0, -Inf) = +Pi 67 // Atan2(y<0, -Inf) = -Pi 68 // Atan2(+Inf, x) = +Pi/2 69 // Atan2(-Inf, x) = -Pi/2 70 // The algorithm used is minimax polynomial approximation 71 // with coefficients determined with a Remez exchange algorithm. 72 73 TEXT ·atan2Asm(SB), NOSPLIT, $0-24 74 // special case 75 MOVD x+0(FP), R1 76 MOVD y+8(FP), R2 77 78 // special case Atan2(NaN, y) = NaN 79 MOVD $~(1<<63), R5 80 AND R1, R5 // x = |x| 81 MOVD $PosInf, R3 82 CMPUBLT R3, R5, returnX 83 84 // special case Atan2(x, NaN) = NaN 85 MOVD $~(1<<63), R5 86 AND R2, R5 87 CMPUBLT R3, R5, returnY 88 89 MOVD $NegZero, R3 90 CMPUBEQ R3, R1, xIsNegZero 91 92 MOVD $0, R3 93 CMPUBEQ R3, R1, xIsPosZero 94 95 MOVD $PosInf, R4 96 CMPUBEQ R4, R2, yIsPosInf 97 98 MOVD $NegInf, R4 99 CMPUBEQ R4, R2, yIsNegInf 100 BR Normal 101 xIsNegZero: 102 // special case Atan(-0, y>=0) = -0 103 MOVD $0, R4 104 CMPBLE R4, R2, returnX 105 106 //special case Atan2(-0, y<=-0) = -Pi 107 MOVD $NegZero, R4 108 CMPBGE R4, R2, returnNegPi 109 BR Normal 110 xIsPosZero: 111 //special case Atan2(0, 0) = 0 112 MOVD $0, R4 113 CMPUBEQ R4, R2, returnX 114 115 //special case Atan2(0, y<=-0) = Pi 116 MOVD $NegZero, R4 117 CMPBGE R4, R2, returnPi 118 BR Normal 119 yIsNegInf: 120 //special case Atan2(+Inf, -Inf) = 3Pi/4 121 MOVD $PosInf, R3 122 CMPUBEQ R3, R1, posInfNegInf 123 124 //special case Atan2(-Inf, -Inf) = -3Pi/4 125 MOVD $NegInf, R3 126 CMPUBEQ R3, R1, negInfNegInf 127 BR Normal 128 yIsPosInf: 129 //special case Atan2(+Inf, +Inf) = Pi/4 130 MOVD $PosInf, R3 131 CMPUBEQ R3, R1, posInfPosInf 132 133 //special case Atan2(-Inf, +Inf) = -Pi/4 134 MOVD $NegInf, R3 135 CMPUBEQ R3, R1, negInfPosInf 136 137 //special case Atan2(-Pi, +Inf) = Pi 138 MOVD $NegPi, R3 139 CMPUBEQ R3, R1, negPiPosInf 140 141 Normal: 142 FMOVD x+0(FP), F0 143 FMOVD y+8(FP), F2 144 MOVD $·atan2rodataL25<>+0(SB), R9 145 WORD $0xB3CD0020 //lgdr %r2,%f0 146 WORD $0xB3CD0012 //lgdr %r1,%f2 147 WORD $0xEC2220BF //risbgn %r2,%r2,64-32,128+63,64+0+32 148 BYTE $0x60 149 BYTE $0x59 150 WORD $0xEC1120BF //risbgn %r1,%r1,64-32,128+63,64+0+32 151 BYTE $0x60 152 BYTE $0x59 153 WORD $0xB9170032 //llgtr %r3,%r2 154 WORD $0xEC523FBF //risbg %r5,%r2,64-1,128+63,64+32+1 155 BYTE $0x61 156 BYTE $0x55 157 WORD $0xB9170041 //llgtr %r4,%r1 158 WFLCDB V0, V20 159 MOVW R4, R6 160 MOVW R3, R7 161 CMPUBLT R6, R7, L17 162 WFDDB V2, V0, V3 163 ADDW $2, R5, R2 164 MOVW R4, R6 165 MOVW R3, R7 166 CMPUBLE R6, R7, L20 167 L3: 168 WFMDB V3, V3, V4 169 VLEG $0, 152(R9), V18 170 VLEG $0, 144(R9), V16 171 FMOVD 136(R9), F1 172 FMOVD 128(R9), F5 173 FMOVD 120(R9), F6 174 WFMADB V4, V16, V5, V16 175 WFMADB V4, V6, V1, V6 176 FMOVD 112(R9), F7 177 WFMDB V4, V4, V1 178 WFMADB V4, V7, V18, V7 179 VLEG $0, 104(R9), V18 180 WFMADB V1, V6, V16, V6 181 CMPWU R4, R3 182 FMOVD 96(R9), F5 183 VLEG $0, 88(R9), V16 184 WFMADB V4, V5, V18, V5 185 VLEG $0, 80(R9), V18 186 VLEG $0, 72(R9), V22 187 WFMADB V4, V16, V18, V16 188 VLEG $0, 64(R9), V18 189 WFMADB V1, V7, V5, V7 190 WFMADB V4, V18, V22, V18 191 WFMDB V1, V1, V5 192 WFMADB V1, V16, V18, V16 193 VLEG $0, 56(R9), V18 194 WFMADB V5, V6, V7, V6 195 VLEG $0, 48(R9), V22 196 FMOVD 40(R9), F7 197 WFMADB V4, V7, V18, V7 198 VLEG $0, 32(R9), V18 199 WFMADB V5, V6, V16, V6 200 WFMADB V4, V18, V22, V18 201 VLEG $0, 24(R9), V16 202 WFMADB V1, V7, V18, V7 203 VLEG $0, 16(R9), V18 204 VLEG $0, 8(R9), V22 205 WFMADB V4, V18, V16, V18 206 VLEG $0, 0(R9), V16 207 WFMADB V5, V6, V7, V6 208 WFMADB V4, V16, V22, V16 209 FMUL F3, F4 210 WFMADB V1, V18, V16, V1 211 FMADD F6, F5, F1 212 WFMADB V4, V1, V3, V4 213 BLT L18 214 BGT L7 215 WORD $0xB3120022 //ltdbr %f2,%f2 216 BLTU L21 217 L8: 218 WORD $0xB3120000 //ltdbr %f0,%f0 219 BLTU L22 220 L9: 221 WFCHDBS V2, V0, V0 222 BNE L18 223 L7: 224 MOVW R1, R6 225 CMPBGE R6, $0, L1 226 L18: 227 WORD $0xEC223ABC //risbg %r2,%r2,58,128+60,3 228 BYTE $0x03 229 BYTE $0x55 230 MOVD $·atan2xpi2h<>+0(SB), R1 231 MOVD ·atan2xpim<>+0(SB), R3 232 WORD $0xB3C10003 //ldgr %f0,%r3 233 WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1) 234 BYTE $0x40 235 BYTE $0x1E 236 L1: 237 FMOVD F4, ret+16(FP) 238 RET 239 240 L20: 241 WORD $0xB3120022 //ltdbr %f2,%f2 242 BLTU L23 243 FMOVD F2, F6 244 L4: 245 WORD $0xB3120000 //ltdbr %f0,%f0 246 BLTU L24 247 FMOVD F0, F4 248 L5: 249 WFCHDBS V6, V4, V4 250 BEQ L3 251 L17: 252 WFDDB V0, V2, V4 253 BYTE $0x18 //lr %r2,%r5 254 BYTE $0x25 255 WORD $0xB3130034 //lcdbr %f3,%f4 256 BR L3 257 L23: 258 WORD $0xB3130062 //lcdbr %f6,%f2 259 BR L4 260 L22: 261 VLR V20, V0 262 BR L9 263 L21: 264 WORD $0xB3130022 //lcdbr %f2,%f2 265 BR L8 266 L24: 267 VLR V20, V4 268 BR L5 269 returnX: //the result is same as the first argument 270 MOVD R1, ret+16(FP) 271 RET 272 returnY: //the result is same as the second argument 273 MOVD R2, ret+16(FP) 274 RET 275 returnPi: 276 MOVD $Pi, R1 277 MOVD R1, ret+16(FP) 278 RET 279 returnNegPi: 280 MOVD $NegPi, R1 281 MOVD R1, ret+16(FP) 282 RET 283 posInfNegInf: 284 MOVD $Pi3Div4, R1 285 MOVD R1, ret+16(FP) 286 RET 287 negInfNegInf: 288 MOVD $NegPi3Div4, R1 289 MOVD R1, ret+16(FP) 290 RET 291 posInfPosInf: 292 MOVD $PiDiv4, R1 293 MOVD R1, ret+16(FP) 294 RET 295 negInfPosInf: 296 MOVD $NegPiDiv4, R1 297 MOVD R1, ret+16(FP) 298 RET 299 negPiPosInf: 300 MOVD $NegZero, R1 301 MOVD R1, ret+16(FP) 302 RET