github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/math/atan2_s390x.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 #define PosInf 0x7FF0000000000000 8 #define NegInf 0xFFF0000000000000 9 #define NegZero 0x8000000000000000 10 #define Pi 0x400921FB54442D18 11 #define NegPi 0xC00921FB54442D18 12 #define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4 13 #define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4 14 #define PiDiv4 0x3FE921FB54442D18 // Pi/4 15 #define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4 16 17 // Minimax polynomial coefficients and other constants 18 DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00 19 DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00 20 DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00 21 DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00 22 DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01 23 DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01 24 DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01 25 DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01 26 DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01 27 DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01 28 DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01 29 DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01 30 DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01 31 DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01 32 DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02 33 DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04 34 DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02 35 DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03 36 DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03 37 DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01 38 GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160 39 40 DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b 41 DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b 42 DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b 43 DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b 44 GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32 45 DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000 46 GLOBL ·atan2xpim<> + 0(SB), RODATA, $8 47 48 // Atan2 returns the arc tangent of y/x, using 49 // the signs of the two to determine the quadrant 50 // of the return value. 51 // 52 // Special cases are (in order): 53 // Atan2(y, NaN) = NaN 54 // Atan2(NaN, x) = NaN 55 // Atan2(+0, x>=0) = +0 56 // Atan2(-0, x>=0) = -0 57 // Atan2(+0, x<=-0) = +Pi 58 // Atan2(-0, x<=-0) = -Pi 59 // Atan2(y>0, 0) = +Pi/2 60 // Atan2(y<0, 0) = -Pi/2 61 // Atan2(+Inf, +Inf) = +Pi/4 62 // Atan2(-Inf, +Inf) = -Pi/4 63 // Atan2(+Inf, -Inf) = 3Pi/4 64 // Atan2(-Inf, -Inf) = -3Pi/4 65 // Atan2(y, +Inf) = 0 66 // Atan2(y>0, -Inf) = +Pi 67 // Atan2(y<0, -Inf) = -Pi 68 // Atan2(+Inf, x) = +Pi/2 69 // Atan2(-Inf, x) = -Pi/2 70 // The algorithm used is minimax polynomial approximation 71 // with coefficients determined with a Remez exchange algorithm. 72 73 TEXT ·atan2Asm(SB), NOSPLIT, $0-24 74 // special case 75 MOVD x+0(FP), R1 76 MOVD y+8(FP), R2 77 78 // special case Atan2(NaN, y) = NaN 79 MOVD $~(1<<63), R5 80 AND R1, R5 // x = |x| 81 MOVD $PosInf, R3 82 CMPUBLT R3, R5, returnX 83 84 // special case Atan2(x, NaN) = NaN 85 MOVD $~(1<<63), R5 86 AND R2, R5 87 CMPUBLT R3, R5, returnY 88 89 MOVD $NegZero, R3 90 CMPUBEQ R3, R1, xIsNegZero 91 92 MOVD $0, R3 93 CMPUBEQ R3, R1, xIsPosZero 94 95 MOVD $PosInf, R4 96 CMPUBEQ R4, R2, yIsPosInf 97 98 MOVD $NegInf, R4 99 CMPUBEQ R4, R2, yIsNegInf 100 BR Normal 101 xIsNegZero: 102 // special case Atan(-0, y>=0) = -0 103 MOVD $0, R4 104 CMPBLE R4, R2, returnX 105 106 //special case Atan2(-0, y<=-0) = -Pi 107 MOVD $NegZero, R4 108 CMPBGE R4, R2, returnNegPi 109 BR Normal 110 xIsPosZero: 111 //special case Atan2(0, 0) = 0 112 MOVD $0, R4 113 CMPUBEQ R4, R2, returnX 114 115 //special case Atan2(0, y<=-0) = Pi 116 MOVD $NegZero, R4 117 CMPBGE R4, R2, returnPi 118 BR Normal 119 yIsNegInf: 120 //special case Atan2(+Inf, -Inf) = 3Pi/4 121 MOVD $PosInf, R3 122 CMPUBEQ R3, R1, posInfNegInf 123 124 //special case Atan2(-Inf, -Inf) = -3Pi/4 125 MOVD $NegInf, R3 126 CMPUBEQ R3, R1, negInfNegInf 127 BR Normal 128 yIsPosInf: 129 //special case Atan2(+Inf, +Inf) = Pi/4 130 MOVD $PosInf, R3 131 CMPUBEQ R3, R1, posInfPosInf 132 133 //special case Atan2(-Inf, +Inf) = -Pi/4 134 MOVD $NegInf, R3 135 CMPUBEQ R3, R1, negInfPosInf 136 137 //special case Atan2(x, +Inf) = Copysign(0, x) 138 CMPBLT R1, $0, returnNegZero 139 BR returnPosZero 140 141 Normal: 142 FMOVD x+0(FP), F0 143 FMOVD y+8(FP), F2 144 MOVD $·atan2rodataL25<>+0(SB), R9 145 LGDR F0, R2 146 LGDR F2, R1 147 RISBGNZ $32, $63, $32, R2, R2 148 RISBGNZ $32, $63, $32, R1, R1 149 WORD $0xB9170032 //llgtr %r3,%r2 150 RISBGZ $63, $63, $33, R2, R5 151 WORD $0xB9170041 //llgtr %r4,%r1 152 WFLCDB V0, V20 153 MOVW R4, R6 154 MOVW R3, R7 155 CMPUBLT R6, R7, L17 156 WFDDB V2, V0, V3 157 ADDW $2, R5, R2 158 MOVW R4, R6 159 MOVW R3, R7 160 CMPUBLE R6, R7, L20 161 L3: 162 WFMDB V3, V3, V4 163 VLEG $0, 152(R9), V18 164 VLEG $0, 144(R9), V16 165 FMOVD 136(R9), F1 166 FMOVD 128(R9), F5 167 FMOVD 120(R9), F6 168 WFMADB V4, V16, V5, V16 169 WFMADB V4, V6, V1, V6 170 FMOVD 112(R9), F7 171 WFMDB V4, V4, V1 172 WFMADB V4, V7, V18, V7 173 VLEG $0, 104(R9), V18 174 WFMADB V1, V6, V16, V6 175 CMPWU R4, R3 176 FMOVD 96(R9), F5 177 VLEG $0, 88(R9), V16 178 WFMADB V4, V5, V18, V5 179 VLEG $0, 80(R9), V18 180 VLEG $0, 72(R9), V22 181 WFMADB V4, V16, V18, V16 182 VLEG $0, 64(R9), V18 183 WFMADB V1, V7, V5, V7 184 WFMADB V4, V18, V22, V18 185 WFMDB V1, V1, V5 186 WFMADB V1, V16, V18, V16 187 VLEG $0, 56(R9), V18 188 WFMADB V5, V6, V7, V6 189 VLEG $0, 48(R9), V22 190 FMOVD 40(R9), F7 191 WFMADB V4, V7, V18, V7 192 VLEG $0, 32(R9), V18 193 WFMADB V5, V6, V16, V6 194 WFMADB V4, V18, V22, V18 195 VLEG $0, 24(R9), V16 196 WFMADB V1, V7, V18, V7 197 VLEG $0, 16(R9), V18 198 VLEG $0, 8(R9), V22 199 WFMADB V4, V18, V16, V18 200 VLEG $0, 0(R9), V16 201 WFMADB V5, V6, V7, V6 202 WFMADB V4, V16, V22, V16 203 FMUL F3, F4 204 WFMADB V1, V18, V16, V1 205 FMADD F6, F5, F1 206 WFMADB V4, V1, V3, V4 207 BLT L18 208 BGT L7 209 LTDBR F2, F2 210 BLTU L21 211 L8: 212 LTDBR F0, F0 213 BLTU L22 214 L9: 215 WFCHDBS V2, V0, V0 216 BNE L18 217 L7: 218 MOVW R1, R6 219 CMPBGE R6, $0, L1 220 L18: 221 RISBGZ $58, $60, $3, R2, R2 222 MOVD $·atan2xpi2h<>+0(SB), R1 223 MOVD ·atan2xpim<>+0(SB), R3 224 LDGR R3, F0 225 WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1) 226 BYTE $0x40 227 BYTE $0x1E 228 L1: 229 FMOVD F4, ret+16(FP) 230 RET 231 232 L20: 233 LTDBR F2, F2 234 BLTU L23 235 FMOVD F2, F6 236 L4: 237 LTDBR F0, F0 238 BLTU L24 239 FMOVD F0, F4 240 L5: 241 WFCHDBS V6, V4, V4 242 BEQ L3 243 L17: 244 WFDDB V0, V2, V4 245 BYTE $0x18 //lr %r2,%r5 246 BYTE $0x25 247 WORD $0xB3130034 //lcdbr %f3,%f4 248 BR L3 249 L23: 250 WORD $0xB3130062 //lcdbr %f6,%f2 251 BR L4 252 L22: 253 VLR V20, V0 254 BR L9 255 L21: 256 WORD $0xB3130022 //lcdbr %f2,%f2 257 BR L8 258 L24: 259 VLR V20, V4 260 BR L5 261 returnX: //the result is same as the first argument 262 MOVD R1, ret+16(FP) 263 RET 264 returnY: //the result is same as the second argument 265 MOVD R2, ret+16(FP) 266 RET 267 returnPi: 268 MOVD $Pi, R1 269 MOVD R1, ret+16(FP) 270 RET 271 returnNegPi: 272 MOVD $NegPi, R1 273 MOVD R1, ret+16(FP) 274 RET 275 posInfNegInf: 276 MOVD $Pi3Div4, R1 277 MOVD R1, ret+16(FP) 278 RET 279 negInfNegInf: 280 MOVD $NegPi3Div4, R1 281 MOVD R1, ret+16(FP) 282 RET 283 posInfPosInf: 284 MOVD $PiDiv4, R1 285 MOVD R1, ret+16(FP) 286 RET 287 negInfPosInf: 288 MOVD $NegPiDiv4, R1 289 MOVD R1, ret+16(FP) 290 RET 291 returnNegZero: 292 MOVD $NegZero, R1 293 MOVD R1, ret+16(FP) 294 RET 295 returnPosZero: 296 MOVD $0, ret+16(FP) 297 RET