github.com/likebike/go--@v0.0.0-20190911215757-0bd925d16e96/go/src/math/atan2_s390x.s (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  #define PosInf		0x7FF0000000000000
     8  #define NegInf		0xFFF0000000000000
     9  #define NegZero		0x8000000000000000
    10  #define Pi		0x400921FB54442D18
    11  #define NegPi		0xC00921FB54442D18
    12  #define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
    13  #define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
    14  #define PiDiv4		0x3FE921FB54442D18	// Pi/4
    15  #define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
    16  
    17  // Minimax polynomial coefficients and other constants
    18  DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
    19  DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
    20  DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
    21  DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
    22  DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
    23  DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
    24  DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
    25  DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
    26  DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
    27  DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
    28  DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
    29  DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
    30  DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
    31  DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
    32  DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
    33  DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
    34  DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
    35  DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
    36  DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
    37  DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
    38  GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
    39  
    40  DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
    41  DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
    42  DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
    43  DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
    44  GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
    45  DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
    46  GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
    47  
    48  // Atan2 returns the arc tangent of y/x, using
    49  // the signs of the two to determine the quadrant
    50  // of the return value.
    51  //
    52  // Special cases are (in order):
    53  //      Atan2(y, NaN) = NaN
    54  //      Atan2(NaN, x) = NaN
    55  //      Atan2(+0, x>=0) = +0
    56  //      Atan2(-0, x>=0) = -0
    57  //      Atan2(+0, x<=-0) = +Pi
    58  //      Atan2(-0, x<=-0) = -Pi
    59  //      Atan2(y>0, 0) = +Pi/2
    60  //      Atan2(y<0, 0) = -Pi/2
    61  //      Atan2(+Inf, +Inf) = +Pi/4
    62  //      Atan2(-Inf, +Inf) = -Pi/4
    63  //      Atan2(+Inf, -Inf) = 3Pi/4
    64  //      Atan2(-Inf, -Inf) = -3Pi/4
    65  //      Atan2(y, +Inf) = 0
    66  //      Atan2(y>0, -Inf) = +Pi
    67  //      Atan2(y<0, -Inf) = -Pi
    68  //      Atan2(+Inf, x) = +Pi/2
    69  //      Atan2(-Inf, x) = -Pi/2
    70  // The algorithm used is minimax polynomial approximation
    71  // with coefficients determined with a Remez exchange algorithm.
    72  
    73  TEXT	·atan2Asm(SB), NOSPLIT, $0-24
    74  	// special case
    75  	MOVD	x+0(FP), R1
    76  	MOVD	y+8(FP), R2
    77  
    78  	// special case Atan2(NaN, y) = NaN
    79  	MOVD	$~(1<<63), R5
    80  	AND	R1, R5		// x = |x|
    81  	MOVD	$PosInf, R3
    82  	CMPUBLT	R3, R5, returnX
    83  
    84  	// special case Atan2(x, NaN) = NaN
    85  	MOVD	$~(1<<63), R5
    86  	AND	R2, R5
    87  	CMPUBLT R3, R5, returnY
    88  
    89  	MOVD	$NegZero, R3
    90  	CMPUBEQ	R3, R1, xIsNegZero
    91  
    92  	MOVD	$0, R3
    93  	CMPUBEQ	R3, R1, xIsPosZero
    94  
    95  	MOVD	$PosInf, R4
    96  	CMPUBEQ	R4, R2, yIsPosInf
    97  
    98  	MOVD	$NegInf, R4
    99  	CMPUBEQ	R4, R2, yIsNegInf
   100  	BR	Normal
   101  xIsNegZero:
   102  	// special case Atan(-0, y>=0) = -0
   103  	MOVD	$0, R4
   104  	CMPBLE	R4, R2, returnX
   105  
   106  	//special case Atan2(-0, y<=-0) = -Pi
   107  	MOVD	$NegZero, R4
   108  	CMPBGE	R4, R2, returnNegPi
   109  	BR	Normal
   110  xIsPosZero:
   111  	//special case Atan2(0, 0) = 0
   112  	MOVD	$0, R4
   113  	CMPUBEQ	R4, R2, returnX
   114  
   115  	//special case Atan2(0, y<=-0) = Pi
   116  	MOVD	$NegZero, R4
   117  	CMPBGE	R4, R2, returnPi
   118  	BR Normal
   119  yIsNegInf:
   120  	//special case Atan2(+Inf, -Inf) = 3Pi/4
   121  	MOVD	$PosInf, R3
   122  	CMPUBEQ	R3, R1, posInfNegInf
   123  
   124  	//special case Atan2(-Inf, -Inf) = -3Pi/4
   125  	MOVD	$NegInf, R3
   126  	CMPUBEQ	R3, R1, negInfNegInf
   127  	BR Normal
   128  yIsPosInf:
   129  	//special case Atan2(+Inf, +Inf) = Pi/4
   130  	MOVD	$PosInf, R3
   131  	CMPUBEQ	R3, R1, posInfPosInf
   132  
   133  	//special case Atan2(-Inf, +Inf) = -Pi/4
   134  	MOVD	$NegInf, R3
   135  	CMPUBEQ	R3, R1, negInfPosInf
   136  
   137  	//special case Atan2(-Pi, +Inf) = Pi
   138  	MOVD	$NegPi, R3
   139  	CMPUBEQ	R3, R1, negPiPosInf
   140  
   141  Normal:
   142  	FMOVD	x+0(FP), F0
   143  	FMOVD	y+8(FP), F2
   144  	MOVD	$·atan2rodataL25<>+0(SB), R9
   145  	WORD	$0xB3CD0020	//lgdr	%r2,%f0
   146  	WORD	$0xB3CD0012	//lgdr	%r1,%f2
   147  	WORD	$0xEC2220BF	//risbgn	%r2,%r2,64-32,128+63,64+0+32
   148  	BYTE	$0x60
   149  	BYTE	$0x59
   150  	WORD	$0xEC1120BF	//risbgn	%r1,%r1,64-32,128+63,64+0+32
   151  	BYTE	$0x60
   152  	BYTE	$0x59
   153  	WORD	$0xB9170032	//llgtr	%r3,%r2
   154  	WORD	$0xEC523FBF	//risbg	%r5,%r2,64-1,128+63,64+32+1
   155  	BYTE	$0x61
   156  	BYTE	$0x55
   157  	WORD	$0xB9170041	//llgtr	%r4,%r1
   158  	WFLCDB	V0, V20
   159  	MOVW	R4, R6
   160  	MOVW	R3, R7
   161  	CMPUBLT	R6, R7, L17
   162  	WFDDB	V2, V0, V3
   163  	ADDW	$2, R5, R2
   164  	MOVW	R4, R6
   165  	MOVW	R3, R7
   166  	CMPUBLE	R6, R7, L20
   167  L3:
   168  	WFMDB	V3, V3, V4
   169  	VLEG	$0, 152(R9), V18
   170  	VLEG	$0, 144(R9), V16
   171  	FMOVD	136(R9), F1
   172  	FMOVD	128(R9), F5
   173  	FMOVD	120(R9), F6
   174  	WFMADB	V4, V16, V5, V16
   175  	WFMADB	V4, V6, V1, V6
   176  	FMOVD	112(R9), F7
   177  	WFMDB	V4, V4, V1
   178  	WFMADB	V4, V7, V18, V7
   179  	VLEG	$0, 104(R9), V18
   180  	WFMADB	V1, V6, V16, V6
   181  	CMPWU	R4, R3
   182  	FMOVD	96(R9), F5
   183  	VLEG	$0, 88(R9), V16
   184  	WFMADB	V4, V5, V18, V5
   185  	VLEG	$0, 80(R9), V18
   186  	VLEG	$0, 72(R9), V22
   187  	WFMADB	V4, V16, V18, V16
   188  	VLEG	$0, 64(R9), V18
   189  	WFMADB	V1, V7, V5, V7
   190  	WFMADB	V4, V18, V22, V18
   191  	WFMDB	V1, V1, V5
   192  	WFMADB	V1, V16, V18, V16
   193  	VLEG	$0, 56(R9), V18
   194  	WFMADB	V5, V6, V7, V6
   195  	VLEG	$0, 48(R9), V22
   196  	FMOVD	40(R9), F7
   197  	WFMADB	V4, V7, V18, V7
   198  	VLEG	$0, 32(R9), V18
   199  	WFMADB	V5, V6, V16, V6
   200  	WFMADB	V4, V18, V22, V18
   201  	VLEG	$0, 24(R9), V16
   202  	WFMADB	V1, V7, V18, V7
   203  	VLEG	$0, 16(R9), V18
   204  	VLEG	$0, 8(R9), V22
   205  	WFMADB	V4, V18, V16, V18
   206  	VLEG	$0, 0(R9), V16
   207  	WFMADB	V5, V6, V7, V6
   208  	WFMADB	V4, V16, V22, V16
   209  	FMUL	F3, F4
   210  	WFMADB	V1, V18, V16, V1
   211  	FMADD	F6, F5, F1
   212  	WFMADB	V4, V1, V3, V4
   213  	BLT	L18
   214  	BGT	L7
   215  	WORD	$0xB3120022	//ltdbr	%f2,%f2
   216  	BLTU	L21
   217  L8:
   218  	WORD	$0xB3120000	//ltdbr	%f0,%f0
   219  	BLTU	L22
   220  L9:
   221  	WFCHDBS	V2, V0, V0
   222  	BNE	L18
   223  L7:
   224  	MOVW	R1, R6
   225  	CMPBGE	R6, $0, L1
   226  L18:
   227  	WORD	$0xEC223ABC	//risbg	%r2,%r2,58,128+60,3
   228  	BYTE	$0x03
   229  	BYTE	$0x55
   230  	MOVD	$·atan2xpi2h<>+0(SB), R1
   231  	MOVD	·atan2xpim<>+0(SB), R3
   232  	WORD	$0xB3C10003	//ldgr	%f0,%r3
   233  	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
   234  	BYTE	$0x40
   235  	BYTE	$0x1E
   236  L1:
   237  	FMOVD	F4, ret+16(FP)
   238  	RET
   239  
   240  L20:
   241  	WORD	$0xB3120022	//ltdbr	%f2,%f2
   242  	BLTU	L23
   243  	FMOVD	F2, F6
   244  L4:
   245  	WORD	$0xB3120000	//ltdbr	%f0,%f0
   246  	BLTU	L24
   247  	FMOVD	F0, F4
   248  L5:
   249  	WFCHDBS	V6, V4, V4
   250  	BEQ	L3
   251  L17:
   252  	WFDDB	V0, V2, V4
   253  	BYTE	$0x18	//lr	%r2,%r5
   254  	BYTE	$0x25
   255  	WORD	$0xB3130034	//lcdbr	%f3,%f4
   256  	BR	L3
   257  L23:
   258  	WORD	$0xB3130062	//lcdbr	%f6,%f2
   259  	BR	L4
   260  L22:
   261  	VLR	V20, V0
   262  	BR	L9
   263  L21:
   264  	WORD	$0xB3130022	//lcdbr	%f2,%f2
   265  	BR	L8
   266  L24:
   267  	VLR	V20, V4
   268  	BR	L5
   269  returnX:	//the result is same as the first argument
   270  	MOVD	R1, ret+16(FP)
   271  	RET
   272  returnY:	//the result is same as the second argument
   273  	MOVD	R2, ret+16(FP)
   274  	RET
   275  returnPi:
   276  	MOVD	$Pi, R1
   277  	MOVD	R1, ret+16(FP)
   278  	RET
   279  returnNegPi:
   280  	MOVD	$NegPi, R1
   281  	MOVD	R1, ret+16(FP)
   282  	RET
   283  posInfNegInf:
   284  	MOVD	$Pi3Div4, R1
   285  	MOVD	R1, ret+16(FP)
   286  	RET
   287  negInfNegInf:
   288  	MOVD	$NegPi3Div4, R1
   289  	MOVD	R1, ret+16(FP)
   290  	RET
   291  posInfPosInf:
   292  	MOVD	$PiDiv4, R1
   293  	MOVD	R1, ret+16(FP)
   294  	RET
   295  negInfPosInf:
   296  	MOVD	$NegPiDiv4, R1
   297  	MOVD	R1, ret+16(FP)
   298  	RET
   299  negPiPosInf:
   300  	MOVD	$NegZero, R1
   301  	MOVD	R1, ret+16(FP)
   302  	RET