github.com/likebike/go--@v0.0.0-20190911215757-0bd925d16e96/go/src/math/erf_s390x.s (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // Minimax polynomial coefficients and other constants
     8  DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01
     9  DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00
    10  DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01
    11  DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00
    12  DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01
    13  DATA ·erfrodataL13<> + 40(SB)/8, $0.5
    14  DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00
    15  DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00
    16  DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01
    17  DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02
    18  DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03
    19  DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03
    20  DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04
    21  DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05
    22  DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06
    23  DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07
    24  DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08
    25  DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09
    26  DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10
    27  DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13
    28  DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14
    29  DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12
    30  DATA ·erfrodataL13<> + 176(SB)/8, $-2.25
    31  DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632
    32  DATA ·erfrodataL13<> + 192(SB)/8, $1.0
    33  DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00
    34  DATA ·erfrodataL13<> + 208(SB)/8, $1.0
    35  DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01
    36  DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00
    37  DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02
    38  DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02
    39  DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00
    40  DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01
    41  DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375
    42  DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01
    43  DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00
    44  DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02
    45  DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01
    46  DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02
    47  DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02
    48  DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01
    49  DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00
    50  DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01
    51  DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223
    52  DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01
    53  DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00
    54  DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01
    55  DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01
    56  DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02
    57  DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03
    58  DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04
    59  DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04
    60  DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05
    61  DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07
    62  DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09
    63  DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06
    64  DATA ·erfrodataL13<> + 448(SB)/8, $-0.25
    65  GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456
    66  
    67  // Table of log correction terms
    68  DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01
    69  DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01
    70  DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01
    71  DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02
    72  DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02
    73  DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01
    74  DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01
    75  DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01
    76  DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01
    77  DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01
    78  DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02
    79  DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02
    80  DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02
    81  DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01
    82  DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01
    83  DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01
    84  GLOBL ·erftab2066<> + 0(SB), RODATA, $128
    85  
    86  // Table of +/- 1.0
    87  DATA ·erftab12067<> + 0(SB)/8, $1.0
    88  DATA ·erftab12067<> + 8(SB)/8, $-1.0
    89  GLOBL ·erftab12067<> + 0(SB), RODATA, $16
    90  
    91  // Erf returns the error function of the argument.
    92  //
    93  // Special cases are:
    94  //      Erf(+Inf) = 1
    95  //      Erf(-Inf) = -1
    96  //      Erf(NaN) = NaN
    97  // The algorithm used is minimax polynomial approximation
    98  // with coefficients determined with a Remez exchange algorithm.
    99  
   100  TEXT	·erfAsm(SB), NOSPLIT, $0-16
   101  	FMOVD	x+0(FP), F0
   102  	MOVD	$·erfrodataL13<>+0(SB), R5
   103  	WORD	$0xB3CD0010	//lgdr %r1, %f0
   104  	FMOVD	F0, F6
   105  	SRAD	$48, R1
   106  	MOVH	$16383, R3
   107  	WORD	$0xEC2131BF	//risbg	%r2,%r1,49,128+63,0
   108  	BYTE	$0x00
   109  	BYTE	$0x55
   110  	MOVW	R2, R6
   111  	MOVW	R3, R7
   112  	CMPBGT	R6, R7, L2
   113  	MOVH	$12287, R1
   114  	MOVW	R1, R7
   115  	CMPBLE	R6, R7 ,L12
   116  	MOVH	$16367, R1
   117  	MOVW	R1, R7
   118  	CMPBGT	R6, R7, L5
   119  	FMOVD	448(R5), F4
   120  	FMADD	F0, F0, F4
   121  	FMOVD	440(R5), F3
   122  	WFMDB	V4, V4, V2
   123  	FMOVD	432(R5), F0
   124  	FMOVD	424(R5), F1
   125  	WFMADB	V2, V0, V3, V0
   126  	FMOVD	416(R5), F3
   127  	WFMADB	V2, V1, V3, V1
   128  	FMOVD	408(R5), F5
   129  	FMOVD	400(R5), F3
   130  	WFMADB	V2, V0, V5, V0
   131  	WFMADB	V2, V1, V3, V1
   132  	FMOVD	392(R5), F5
   133  	FMOVD	384(R5), F3
   134  	WFMADB	V2, V0, V5, V0
   135  	WFMADB	V2, V1, V3, V1
   136  	FMOVD	376(R5), F5
   137  	FMOVD	368(R5), F3
   138  	WFMADB	V2, V0, V5, V0
   139  	WFMADB	V2, V1, V3, V1
   140  	FMOVD	360(R5), F5
   141  	FMOVD	352(R5), F3
   142  	WFMADB	V2, V0, V5, V0
   143  	WFMADB	V2, V1, V3, V2
   144  	WFMADB	V4, V0, V2, V0
   145  	WFMADB	V6, V0, V6, V0
   146  L1:
   147  	FMOVD	F0, ret+8(FP)
   148  	RET
   149  L2:
   150  	MOVH	R1, R1
   151  	MOVH	$16407, R3
   152  	SRW	$31, R1, R1
   153  	MOVW	R2, R6
   154  	MOVW	R3, R7
   155  	CMPBLE	R6, R7, L6
   156  	MOVW	R1, R1
   157  	SLD	$3, R1, R1
   158  	MOVD	$·erftab12067<>+0(SB), R3
   159  	WORD    $0x68013000     //ld %f0,0(%r1,%r3)
   160  	MOVH	$32751, R1
   161  	MOVW	R1, R7
   162  	CMPBGT	R6, R7, L7
   163  	FMOVD	344(R5), F2
   164  	FMADD	F2, F0, F0
   165  L7:
   166  	WFCEDBS	V6, V6, V2
   167  	BEQ	L1
   168  	FMOVD	F6, F0
   169  	FMOVD	F0, ret+8(FP)
   170  	RET
   171  
   172  L6:
   173  	MOVW	R1, R1
   174  	SLD	$3, R1, R1
   175  	MOVD	$·erftab12067<>+0(SB), R4
   176  	WFMDB	V0, V0, V1
   177  	MOVH	$0x0, R3
   178  	WORD    $0x68014000     //ld %f0,0(%r1,%r4)
   179  	MOVH	$16399, R1
   180  	MOVW	R2, R6
   181  	MOVW	R1, R7
   182  	CMPBGT	R6, R7, L8
   183  	FMOVD	336(R5), F3
   184  	FMOVD	328(R5), F2
   185  	FMOVD	F1, F4
   186  	WFMADB	V1, V2, V3, V2
   187  	WORD	$0xED405140	//adb %f4,.L30-.L13(%r5)
   188  	BYTE	$0x00
   189  	BYTE	$0x1A
   190  	FMOVD	312(R5), F3
   191  	WFMADB	V1, V2, V3, V2
   192  	FMOVD	304(R5), F3
   193  	WFMADB	V1, V4, V3, V4
   194  	FMOVD	296(R5), F3
   195  	WFMADB	V1, V2, V3, V2
   196  	FMOVD	288(R5), F3
   197  	WFMADB	V1, V4, V3, V4
   198  	FMOVD	280(R5), F3
   199  	WFMADB	V1, V2, V3, V2
   200  	FMOVD	272(R5), F3
   201  	WFMADB	V1, V4, V3, V4
   202  L9:
   203  	FMOVD	264(R5), F3
   204  	FMUL	F4, F6
   205  	FMOVD	256(R5), F4
   206  	WFMADB	V1, V4, V3, V4
   207  	FDIV	F6, F2
   208  	WORD	$0xB3CD0014	//lgdr %r1, %f4
   209  	FSUB	F3, F4
   210  	FMOVD	248(R5), F6
   211  	WFMSDB	V4, V6, V1, V4
   212  	FMOVD	240(R5), F1
   213  	FMOVD	232(R5), F6
   214  	WFMADB	V4, V6, V1, V6
   215  	FMOVD	224(R5), F1
   216  	FMOVD	216(R5), F3
   217  	WFMADB	V4, V3, V1, V3
   218  	WFMDB	V4, V4, V1
   219  	FMOVD	208(R5), F5
   220  	WFMADB	V6, V1, V3, V6
   221  	FMOVD	200(R5), F3
   222  	MOVH	R1,R1
   223  	WFMADB	V4, V3, V5, V3
   224  	WORD	$0xEC2139BC	//risbg	%r2,%r1,57,128+60,3
   225  	BYTE	$0x03
   226  	BYTE	$0x55
   227  	WFMADB	V1, V6, V3, V6
   228  	WORD	$0xEC31000F	//risbgn %r3,%r1,64-64+0,64-64+0+16-1,64-0-16
   229  	BYTE	$0x30
   230  	BYTE	$0x59
   231  	MOVD	$·erftab2066<>+0(SB), R1
   232  	FMOVD	192(R5), F1
   233  	WORD	$0xB3C10033	//ldgr %f3,%r3
   234  	WORD	$0xED221000	//madb %f2,%f2,0(%r2,%r1)
   235  	BYTE	$0x20
   236  	BYTE	$0x1E
   237  	WFMADB	V4, V6, V1, V4
   238  	FMUL	F3, F2
   239  	FMADD	F4, F2, F0
   240  	FMOVD	F0, ret+8(FP)
   241  	RET
   242  L12:
   243  	FMOVD	184(R5), F0
   244  	WFMADB	V6, V0, V6, V0
   245  	FMOVD	F0, ret+8(FP)
   246  	RET
   247  L5:
   248  	FMOVD	176(R5), F1
   249  	FMADD	F0, F0, F1
   250  	FMOVD	168(R5), F3
   251  	WFMDB	V1, V1, V2
   252  	FMOVD	160(R5), F0
   253  	FMOVD	152(R5), F4
   254  	WFMADB	V2, V0, V3, V0
   255  	FMOVD	144(R5), F3
   256  	WFMADB	V2, V4, V3, V4
   257  	FMOVD	136(R5), F5
   258  	FMOVD	128(R5), F3
   259  	WFMADB	V2, V0, V5, V0
   260  	WFMADB	V2, V4, V3, V4
   261  	FMOVD	120(R5), F5
   262  	FMOVD	112(R5), F3
   263  	WFMADB	V2, V0, V5, V0
   264  	WFMADB	V2, V4, V3, V4
   265  	FMOVD	104(R5), F5
   266  	FMOVD	96(R5), F3
   267  	WFMADB	V2, V0, V5, V0
   268  	WFMADB	V2, V4, V3, V4
   269  	FMOVD	88(R5), F5
   270  	FMOVD	80(R5), F3
   271  	WFMADB	V2, V0, V5, V0
   272  	WFMADB	V2, V4, V3, V4
   273  	FMOVD	72(R5), F5
   274  	FMOVD	64(R5), F3
   275  	WFMADB	V2, V0, V5, V0
   276  	WFMADB	V2, V4, V3, V4
   277  	FMOVD	56(R5), F5
   278  	FMOVD	48(R5), F3
   279  	WFMADB	V2, V0, V5, V0
   280  	WFMADB	V2, V4, V3, V2
   281  	FMOVD	40(R5), F4
   282  	WFMADB	V1, V0, V2, V0
   283  	FMUL	F6, F0
   284  	FMADD	F4, F6, F0
   285  	FMOVD	F0, ret+8(FP)
   286  	RET
   287  L8:
   288  	FMOVD	32(R5), F3
   289  	FMOVD	24(R5), F2
   290  	FMOVD	F1, F4
   291  	WFMADB	V1, V2, V3, V2
   292  	WORD	$0xED405010	//adb %f4,.L68-.L13(%r5)
   293  	BYTE	$0x00
   294  	BYTE	$0x1A
   295  	FMOVD	8(R5), F3
   296  	WFMADB	V1, V2, V3, V2
   297  	FMOVD	·erfrodataL13<>+0(SB), F3
   298  	WFMADB	V1, V4, V3, V4
   299  	BR	L9