github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/math/erf_s390x.s (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // Minimax polynomial coefficients and other constants
     8  DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01
     9  DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00
    10  DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01
    11  DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00
    12  DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01
    13  DATA ·erfrodataL13<> + 40(SB)/8, $0.5
    14  DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00
    15  DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00
    16  DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01
    17  DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02
    18  DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03
    19  DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03
    20  DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04
    21  DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05
    22  DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06
    23  DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07
    24  DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08
    25  DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09
    26  DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10
    27  DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13
    28  DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14
    29  DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12
    30  DATA ·erfrodataL13<> + 176(SB)/8, $-2.25
    31  DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632
    32  DATA ·erfrodataL13<> + 192(SB)/8, $1.0
    33  DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00
    34  DATA ·erfrodataL13<> + 208(SB)/8, $1.0
    35  DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01
    36  DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00
    37  DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02
    38  DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02
    39  DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00
    40  DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01
    41  DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375
    42  DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01
    43  DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00
    44  DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02
    45  DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01
    46  DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02
    47  DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02
    48  DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01
    49  DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00
    50  DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01
    51  DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223
    52  DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01
    53  DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00
    54  DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01
    55  DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01
    56  DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02
    57  DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03
    58  DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04
    59  DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04
    60  DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05
    61  DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07
    62  DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09
    63  DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06
    64  DATA ·erfrodataL13<> + 448(SB)/8, $-0.25
    65  GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456
    66  
    67  // Table of log correction terms
    68  DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01
    69  DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01
    70  DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01
    71  DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02
    72  DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02
    73  DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01
    74  DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01
    75  DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01
    76  DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01
    77  DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01
    78  DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02
    79  DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02
    80  DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02
    81  DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01
    82  DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01
    83  DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01
    84  GLOBL ·erftab2066<> + 0(SB), RODATA, $128
    85  
    86  // Table of +/- 1.0
    87  DATA ·erftab12067<> + 0(SB)/8, $1.0
    88  DATA ·erftab12067<> + 8(SB)/8, $-1.0
    89  GLOBL ·erftab12067<> + 0(SB), RODATA, $16
    90  
    91  // Erf returns the error function of the argument.
    92  //
    93  // Special cases are:
    94  //      Erf(+Inf) = 1
    95  //      Erf(-Inf) = -1
    96  //      Erf(NaN) = NaN
    97  // The algorithm used is minimax polynomial approximation
    98  // with coefficients determined with a Remez exchange algorithm.
    99  
   100  TEXT	·erfAsm(SB), NOSPLIT, $0-16
   101  	FMOVD	x+0(FP), F0
   102  	MOVD	$·erfrodataL13<>+0(SB), R5
   103  	LGDR	F0, R1
   104  	FMOVD	F0, F6
   105  	SRAD	$48, R1
   106  	MOVH	$16383, R3
   107  	RISBGZ	$49, $63, $0, R1, R2
   108  	MOVW	R2, R6
   109  	MOVW	R3, R7
   110  	CMPBGT	R6, R7, L2
   111  	MOVH	$12287, R1
   112  	MOVW	R1, R7
   113  	CMPBLE	R6, R7 ,L12
   114  	MOVH	$16367, R1
   115  	MOVW	R1, R7
   116  	CMPBGT	R6, R7, L5
   117  	FMOVD	448(R5), F4
   118  	FMADD	F0, F0, F4
   119  	FMOVD	440(R5), F3
   120  	WFMDB	V4, V4, V2
   121  	FMOVD	432(R5), F0
   122  	FMOVD	424(R5), F1
   123  	WFMADB	V2, V0, V3, V0
   124  	FMOVD	416(R5), F3
   125  	WFMADB	V2, V1, V3, V1
   126  	FMOVD	408(R5), F5
   127  	FMOVD	400(R5), F3
   128  	WFMADB	V2, V0, V5, V0
   129  	WFMADB	V2, V1, V3, V1
   130  	FMOVD	392(R5), F5
   131  	FMOVD	384(R5), F3
   132  	WFMADB	V2, V0, V5, V0
   133  	WFMADB	V2, V1, V3, V1
   134  	FMOVD	376(R5), F5
   135  	FMOVD	368(R5), F3
   136  	WFMADB	V2, V0, V5, V0
   137  	WFMADB	V2, V1, V3, V1
   138  	FMOVD	360(R5), F5
   139  	FMOVD	352(R5), F3
   140  	WFMADB	V2, V0, V5, V0
   141  	WFMADB	V2, V1, V3, V2
   142  	WFMADB	V4, V0, V2, V0
   143  	WFMADB	V6, V0, V6, V0
   144  L1:
   145  	FMOVD	F0, ret+8(FP)
   146  	RET
   147  L2:
   148  	MOVH	R1, R1
   149  	MOVH	$16407, R3
   150  	SRW	$31, R1, R1
   151  	MOVW	R2, R6
   152  	MOVW	R3, R7
   153  	CMPBLE	R6, R7, L6
   154  	MOVW	R1, R1
   155  	SLD	$3, R1, R1
   156  	MOVD	$·erftab12067<>+0(SB), R3
   157  	WORD    $0x68013000     //ld %f0,0(%r1,%r3)
   158  	MOVH	$32751, R1
   159  	MOVW	R1, R7
   160  	CMPBGT	R6, R7, L7
   161  	FMOVD	344(R5), F2
   162  	FMADD	F2, F0, F0
   163  L7:
   164  	WFCEDBS	V6, V6, V2
   165  	BEQ	L1
   166  	FMOVD	F6, F0
   167  	FMOVD	F0, ret+8(FP)
   168  	RET
   169  
   170  L6:
   171  	MOVW	R1, R1
   172  	SLD	$3, R1, R1
   173  	MOVD	$·erftab12067<>+0(SB), R4
   174  	WFMDB	V0, V0, V1
   175  	MOVH	$0x0, R3
   176  	WORD    $0x68014000     //ld %f0,0(%r1,%r4)
   177  	MOVH	$16399, R1
   178  	MOVW	R2, R6
   179  	MOVW	R1, R7
   180  	CMPBGT	R6, R7, L8
   181  	FMOVD	336(R5), F3
   182  	FMOVD	328(R5), F2
   183  	FMOVD	F1, F4
   184  	WFMADB	V1, V2, V3, V2
   185  	WORD	$0xED405140	//adb %f4,.L30-.L13(%r5)
   186  	BYTE	$0x00
   187  	BYTE	$0x1A
   188  	FMOVD	312(R5), F3
   189  	WFMADB	V1, V2, V3, V2
   190  	FMOVD	304(R5), F3
   191  	WFMADB	V1, V4, V3, V4
   192  	FMOVD	296(R5), F3
   193  	WFMADB	V1, V2, V3, V2
   194  	FMOVD	288(R5), F3
   195  	WFMADB	V1, V4, V3, V4
   196  	FMOVD	280(R5), F3
   197  	WFMADB	V1, V2, V3, V2
   198  	FMOVD	272(R5), F3
   199  	WFMADB	V1, V4, V3, V4
   200  L9:
   201  	FMOVD	264(R5), F3
   202  	FMUL	F4, F6
   203  	FMOVD	256(R5), F4
   204  	WFMADB	V1, V4, V3, V4
   205  	FDIV	F6, F2
   206  	LGDR	F4, R1
   207  	FSUB	F3, F4
   208  	FMOVD	248(R5), F6
   209  	WFMSDB	V4, V6, V1, V4
   210  	FMOVD	240(R5), F1
   211  	FMOVD	232(R5), F6
   212  	WFMADB	V4, V6, V1, V6
   213  	FMOVD	224(R5), F1
   214  	FMOVD	216(R5), F3
   215  	WFMADB	V4, V3, V1, V3
   216  	WFMDB	V4, V4, V1
   217  	FMOVD	208(R5), F5
   218  	WFMADB	V6, V1, V3, V6
   219  	FMOVD	200(R5), F3
   220  	MOVH	R1,R1
   221  	WFMADB	V4, V3, V5, V3
   222  	RISBGZ	$57, $60, $3, R1, R2
   223  	WFMADB	V1, V6, V3, V6
   224  	RISBGN	$0, $15, $48, R1, R3
   225  	MOVD	$·erftab2066<>+0(SB), R1
   226  	FMOVD	192(R5), F1
   227  	LDGR	R3, F3
   228  	WORD	$0xED221000	//madb %f2,%f2,0(%r2,%r1)
   229  	BYTE	$0x20
   230  	BYTE	$0x1E
   231  	WFMADB	V4, V6, V1, V4
   232  	FMUL	F3, F2
   233  	FMADD	F4, F2, F0
   234  	FMOVD	F0, ret+8(FP)
   235  	RET
   236  L12:
   237  	FMOVD	184(R5), F0
   238  	WFMADB	V6, V0, V6, V0
   239  	FMOVD	F0, ret+8(FP)
   240  	RET
   241  L5:
   242  	FMOVD	176(R5), F1
   243  	FMADD	F0, F0, F1
   244  	FMOVD	168(R5), F3
   245  	WFMDB	V1, V1, V2
   246  	FMOVD	160(R5), F0
   247  	FMOVD	152(R5), F4
   248  	WFMADB	V2, V0, V3, V0
   249  	FMOVD	144(R5), F3
   250  	WFMADB	V2, V4, V3, V4
   251  	FMOVD	136(R5), F5
   252  	FMOVD	128(R5), F3
   253  	WFMADB	V2, V0, V5, V0
   254  	WFMADB	V2, V4, V3, V4
   255  	FMOVD	120(R5), F5
   256  	FMOVD	112(R5), F3
   257  	WFMADB	V2, V0, V5, V0
   258  	WFMADB	V2, V4, V3, V4
   259  	FMOVD	104(R5), F5
   260  	FMOVD	96(R5), F3
   261  	WFMADB	V2, V0, V5, V0
   262  	WFMADB	V2, V4, V3, V4
   263  	FMOVD	88(R5), F5
   264  	FMOVD	80(R5), F3
   265  	WFMADB	V2, V0, V5, V0
   266  	WFMADB	V2, V4, V3, V4
   267  	FMOVD	72(R5), F5
   268  	FMOVD	64(R5), F3
   269  	WFMADB	V2, V0, V5, V0
   270  	WFMADB	V2, V4, V3, V4
   271  	FMOVD	56(R5), F5
   272  	FMOVD	48(R5), F3
   273  	WFMADB	V2, V0, V5, V0
   274  	WFMADB	V2, V4, V3, V2
   275  	FMOVD	40(R5), F4
   276  	WFMADB	V1, V0, V2, V0
   277  	FMUL	F6, F0
   278  	FMADD	F4, F6, F0
   279  	FMOVD	F0, ret+8(FP)
   280  	RET
   281  L8:
   282  	FMOVD	32(R5), F3
   283  	FMOVD	24(R5), F2
   284  	FMOVD	F1, F4
   285  	WFMADB	V1, V2, V3, V2
   286  	WORD	$0xED405010	//adb %f4,.L68-.L13(%r5)
   287  	BYTE	$0x00
   288  	BYTE	$0x1A
   289  	FMOVD	8(R5), F3
   290  	WFMADB	V1, V2, V3, V2
   291  	FMOVD	·erfrodataL13<>+0(SB), F3
   292  	WFMADB	V1, V4, V3, V4
   293  	BR	L9