github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/math/erf_s390x.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // Minimax polynomial coefficients and other constants 8 DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01 9 DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00 10 DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01 11 DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00 12 DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01 13 DATA ·erfrodataL13<> + 40(SB)/8, $0.5 14 DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00 15 DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00 16 DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01 17 DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02 18 DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03 19 DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03 20 DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04 21 DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05 22 DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06 23 DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07 24 DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08 25 DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09 26 DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10 27 DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13 28 DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14 29 DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12 30 DATA ·erfrodataL13<> + 176(SB)/8, $-2.25 31 DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632 32 DATA ·erfrodataL13<> + 192(SB)/8, $1.0 33 DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00 34 DATA ·erfrodataL13<> + 208(SB)/8, $1.0 35 DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01 36 DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00 37 DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02 38 DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02 39 DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00 40 DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01 41 DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375 42 DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01 43 DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00 44 DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02 45 DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01 46 DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02 47 DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02 48 DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01 49 DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00 50 DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01 51 DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223 52 DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01 53 DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00 54 DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01 55 DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01 56 DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02 57 DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03 58 DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04 59 DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04 60 DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05 61 DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07 62 DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09 63 DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06 64 DATA ·erfrodataL13<> + 448(SB)/8, $-0.25 65 GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456 66 67 // Table of log correction terms 68 DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01 69 DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01 70 DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01 71 DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02 72 DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02 73 DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01 74 DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01 75 DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01 76 DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01 77 DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01 78 DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02 79 DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02 80 DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02 81 DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01 82 DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01 83 DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01 84 GLOBL ·erftab2066<> + 0(SB), RODATA, $128 85 86 // Table of +/- 1.0 87 DATA ·erftab12067<> + 0(SB)/8, $1.0 88 DATA ·erftab12067<> + 8(SB)/8, $-1.0 89 GLOBL ·erftab12067<> + 0(SB), RODATA, $16 90 91 // Erf returns the error function of the argument. 92 // 93 // Special cases are: 94 // Erf(+Inf) = 1 95 // Erf(-Inf) = -1 96 // Erf(NaN) = NaN 97 // The algorithm used is minimax polynomial approximation 98 // with coefficients determined with a Remez exchange algorithm. 99 100 TEXT ·erfAsm(SB), NOSPLIT, $0-16 101 FMOVD x+0(FP), F0 102 MOVD $·erfrodataL13<>+0(SB), R5 103 WORD $0xB3CD0010 //lgdr %r1, %f0 104 FMOVD F0, F6 105 SRAD $48, R1 106 MOVH $16383, R3 107 WORD $0xEC2131BF //risbg %r2,%r1,49,128+63,0 108 BYTE $0x00 109 BYTE $0x55 110 MOVW R2, R6 111 MOVW R3, R7 112 CMPBGT R6, R7, L2 113 MOVH $12287, R1 114 MOVW R1, R7 115 CMPBLE R6, R7 ,L12 116 MOVH $16367, R1 117 MOVW R1, R7 118 CMPBGT R6, R7, L5 119 FMOVD 448(R5), F4 120 FMADD F0, F0, F4 121 FMOVD 440(R5), F3 122 WFMDB V4, V4, V2 123 FMOVD 432(R5), F0 124 FMOVD 424(R5), F1 125 WFMADB V2, V0, V3, V0 126 FMOVD 416(R5), F3 127 WFMADB V2, V1, V3, V1 128 FMOVD 408(R5), F5 129 FMOVD 400(R5), F3 130 WFMADB V2, V0, V5, V0 131 WFMADB V2, V1, V3, V1 132 FMOVD 392(R5), F5 133 FMOVD 384(R5), F3 134 WFMADB V2, V0, V5, V0 135 WFMADB V2, V1, V3, V1 136 FMOVD 376(R5), F5 137 FMOVD 368(R5), F3 138 WFMADB V2, V0, V5, V0 139 WFMADB V2, V1, V3, V1 140 FMOVD 360(R5), F5 141 FMOVD 352(R5), F3 142 WFMADB V2, V0, V5, V0 143 WFMADB V2, V1, V3, V2 144 WFMADB V4, V0, V2, V0 145 WFMADB V6, V0, V6, V0 146 L1: 147 FMOVD F0, ret+8(FP) 148 RET 149 L2: 150 MOVH R1, R1 151 MOVH $16407, R3 152 SRW $31, R1, R1 153 MOVW R2, R6 154 MOVW R3, R7 155 CMPBLE R6, R7, L6 156 MOVW R1, R1 157 SLD $3, R1, R1 158 MOVD $·erftab12067<>+0(SB), R3 159 WORD $0x68013000 //ld %f0,0(%r1,%r3) 160 MOVH $32751, R1 161 MOVW R1, R7 162 CMPBGT R6, R7, L7 163 FMOVD 344(R5), F2 164 FMADD F2, F0, F0 165 L7: 166 WFCEDBS V6, V6, V2 167 BEQ L1 168 FMOVD F6, F0 169 FMOVD F0, ret+8(FP) 170 RET 171 172 L6: 173 MOVW R1, R1 174 SLD $3, R1, R1 175 MOVD $·erftab12067<>+0(SB), R4 176 WFMDB V0, V0, V1 177 MOVH $0x0, R3 178 WORD $0x68014000 //ld %f0,0(%r1,%r4) 179 MOVH $16399, R1 180 MOVW R2, R6 181 MOVW R1, R7 182 CMPBGT R6, R7, L8 183 FMOVD 336(R5), F3 184 FMOVD 328(R5), F2 185 FMOVD F1, F4 186 WFMADB V1, V2, V3, V2 187 WORD $0xED405140 //adb %f4,.L30-.L13(%r5) 188 BYTE $0x00 189 BYTE $0x1A 190 FMOVD 312(R5), F3 191 WFMADB V1, V2, V3, V2 192 FMOVD 304(R5), F3 193 WFMADB V1, V4, V3, V4 194 FMOVD 296(R5), F3 195 WFMADB V1, V2, V3, V2 196 FMOVD 288(R5), F3 197 WFMADB V1, V4, V3, V4 198 FMOVD 280(R5), F3 199 WFMADB V1, V2, V3, V2 200 FMOVD 272(R5), F3 201 WFMADB V1, V4, V3, V4 202 L9: 203 FMOVD 264(R5), F3 204 FMUL F4, F6 205 FMOVD 256(R5), F4 206 WFMADB V1, V4, V3, V4 207 FDIV F6, F2 208 WORD $0xB3CD0014 //lgdr %r1, %f4 209 FSUB F3, F4 210 FMOVD 248(R5), F6 211 WFMSDB V4, V6, V1, V4 212 FMOVD 240(R5), F1 213 FMOVD 232(R5), F6 214 WFMADB V4, V6, V1, V6 215 FMOVD 224(R5), F1 216 FMOVD 216(R5), F3 217 WFMADB V4, V3, V1, V3 218 WFMDB V4, V4, V1 219 FMOVD 208(R5), F5 220 WFMADB V6, V1, V3, V6 221 FMOVD 200(R5), F3 222 MOVH R1,R1 223 WFMADB V4, V3, V5, V3 224 WORD $0xEC2139BC //risbg %r2,%r1,57,128+60,3 225 BYTE $0x03 226 BYTE $0x55 227 WFMADB V1, V6, V3, V6 228 WORD $0xEC31000F //risbgn %r3,%r1,64-64+0,64-64+0+16-1,64-0-16 229 BYTE $0x30 230 BYTE $0x59 231 MOVD $·erftab2066<>+0(SB), R1 232 FMOVD 192(R5), F1 233 WORD $0xB3C10033 //ldgr %f3,%r3 234 WORD $0xED221000 //madb %f2,%f2,0(%r2,%r1) 235 BYTE $0x20 236 BYTE $0x1E 237 WFMADB V4, V6, V1, V4 238 FMUL F3, F2 239 FMADD F4, F2, F0 240 FMOVD F0, ret+8(FP) 241 RET 242 L12: 243 FMOVD 184(R5), F0 244 WFMADB V6, V0, V6, V0 245 FMOVD F0, ret+8(FP) 246 RET 247 L5: 248 FMOVD 176(R5), F1 249 FMADD F0, F0, F1 250 FMOVD 168(R5), F3 251 WFMDB V1, V1, V2 252 FMOVD 160(R5), F0 253 FMOVD 152(R5), F4 254 WFMADB V2, V0, V3, V0 255 FMOVD 144(R5), F3 256 WFMADB V2, V4, V3, V4 257 FMOVD 136(R5), F5 258 FMOVD 128(R5), F3 259 WFMADB V2, V0, V5, V0 260 WFMADB V2, V4, V3, V4 261 FMOVD 120(R5), F5 262 FMOVD 112(R5), F3 263 WFMADB V2, V0, V5, V0 264 WFMADB V2, V4, V3, V4 265 FMOVD 104(R5), F5 266 FMOVD 96(R5), F3 267 WFMADB V2, V0, V5, V0 268 WFMADB V2, V4, V3, V4 269 FMOVD 88(R5), F5 270 FMOVD 80(R5), F3 271 WFMADB V2, V0, V5, V0 272 WFMADB V2, V4, V3, V4 273 FMOVD 72(R5), F5 274 FMOVD 64(R5), F3 275 WFMADB V2, V0, V5, V0 276 WFMADB V2, V4, V3, V4 277 FMOVD 56(R5), F5 278 FMOVD 48(R5), F3 279 WFMADB V2, V0, V5, V0 280 WFMADB V2, V4, V3, V2 281 FMOVD 40(R5), F4 282 WFMADB V1, V0, V2, V0 283 FMUL F6, F0 284 FMADD F4, F6, F0 285 FMOVD F0, ret+8(FP) 286 RET 287 L8: 288 FMOVD 32(R5), F3 289 FMOVD 24(R5), F2 290 FMOVD F1, F4 291 WFMADB V1, V2, V3, V2 292 WORD $0xED405010 //adb %f4,.L68-.L13(%r5) 293 BYTE $0x00 294 BYTE $0x1A 295 FMOVD 8(R5), F3 296 WFMADB V1, V2, V3, V2 297 FMOVD ·erfrodataL13<>+0(SB), F3 298 WFMADB V1, V4, V3, V4 299 BR L9