github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/math/erf_s390x.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // Minimax polynomial coefficients and other constants 8 DATA ·erfrodataL13<> + 0(SB)/8, $0.243673229298474689E+01 9 DATA ·erfrodataL13<> + 8(SB)/8, $-.654905018503145600E+00 10 DATA ·erfrodataL13<> + 16(SB)/8, $0.404669310217538718E+01 11 DATA ·erfrodataL13<> + 24(SB)/8, $-.564189219162765367E+00 12 DATA ·erfrodataL13<> + 32(SB)/8, $-.200104300906596851E+01 13 DATA ·erfrodataL13<> + 40(SB)/8, $0.5 14 DATA ·erfrodataL13<> + 48(SB)/8, $0.144070097650207154E+00 15 DATA ·erfrodataL13<> + 56(SB)/8, $-.116697735205906191E+00 16 DATA ·erfrodataL13<> + 64(SB)/8, $0.256847684882319665E-01 17 DATA ·erfrodataL13<> + 72(SB)/8, $-.510805169106229148E-02 18 DATA ·erfrodataL13<> + 80(SB)/8, $0.885258164825590267E-03 19 DATA ·erfrodataL13<> + 88(SB)/8, $-.133861989591931411E-03 20 DATA ·erfrodataL13<> + 96(SB)/8, $0.178294867340272534E-04 21 DATA ·erfrodataL13<> + 104(SB)/8, $-.211436095674019218E-05 22 DATA ·erfrodataL13<> + 112(SB)/8, $0.225503753499344434E-06 23 DATA ·erfrodataL13<> + 120(SB)/8, $-.218247939190783624E-07 24 DATA ·erfrodataL13<> + 128(SB)/8, $0.193179206264594029E-08 25 DATA ·erfrodataL13<> + 136(SB)/8, $-.157440643541715319E-09 26 DATA ·erfrodataL13<> + 144(SB)/8, $0.118878583237342616E-10 27 DATA ·erfrodataL13<> + 152(SB)/8, $0.554289288424588473E-13 28 DATA ·erfrodataL13<> + 160(SB)/8, $-.277649758489502214E-14 29 DATA ·erfrodataL13<> + 168(SB)/8, $-.839318416990049443E-12 30 DATA ·erfrodataL13<> + 176(SB)/8, $-2.25 31 DATA ·erfrodataL13<> + 184(SB)/8, $.12837916709551258632 32 DATA ·erfrodataL13<> + 192(SB)/8, $1.0 33 DATA ·erfrodataL13<> + 200(SB)/8, $0.500000000000004237e+00 34 DATA ·erfrodataL13<> + 208(SB)/8, $1.0 35 DATA ·erfrodataL13<> + 216(SB)/8, $0.416666664838056960e-01 36 DATA ·erfrodataL13<> + 224(SB)/8, $0.166666666630345592e+00 37 DATA ·erfrodataL13<> + 232(SB)/8, $0.138926439368309441e-02 38 DATA ·erfrodataL13<> + 240(SB)/8, $0.833349307718286047e-02 39 DATA ·erfrodataL13<> + 248(SB)/8, $-.693147180559945286e+00 40 DATA ·erfrodataL13<> + 256(SB)/8, $-.144269504088896339e+01 41 DATA ·erfrodataL13<> + 264(SB)/8, $281475245147134.9375 42 DATA ·erfrodataL13<> + 272(SB)/8, $0.358256136398192529E+01 43 DATA ·erfrodataL13<> + 280(SB)/8, $-.554084396500738270E+00 44 DATA ·erfrodataL13<> + 288(SB)/8, $0.203630123025312046E+02 45 DATA ·erfrodataL13<> + 296(SB)/8, $-.735750304705934424E+01 46 DATA ·erfrodataL13<> + 304(SB)/8, $0.250491598091071797E+02 47 DATA ·erfrodataL13<> + 312(SB)/8, $-.118955882760959931E+02 48 DATA ·erfrodataL13<> + 320(SB)/8, $0.942903335085524187E+01 49 DATA ·erfrodataL13<> + 328(SB)/8, $-.564189522219085689E+00 50 DATA ·erfrodataL13<> + 336(SB)/8, $-.503767199403555540E+01 51 DATA ·erfrodataL13<> + 344(SB)/8, $0xbbc79ca10c924223 52 DATA ·erfrodataL13<> + 352(SB)/8, $0.004099975562609307E+01 53 DATA ·erfrodataL13<> + 360(SB)/8, $-.324434353381296556E+00 54 DATA ·erfrodataL13<> + 368(SB)/8, $0.945204812084476250E-01 55 DATA ·erfrodataL13<> + 376(SB)/8, $-.221407443830058214E-01 56 DATA ·erfrodataL13<> + 384(SB)/8, $0.426072376238804349E-02 57 DATA ·erfrodataL13<> + 392(SB)/8, $-.692229229127016977E-03 58 DATA ·erfrodataL13<> + 400(SB)/8, $0.971111253652087188E-04 59 DATA ·erfrodataL13<> + 408(SB)/8, $-.119752226272050504E-04 60 DATA ·erfrodataL13<> + 416(SB)/8, $0.131662993588532278E-05 61 DATA ·erfrodataL13<> + 424(SB)/8, $0.115776482315851236E-07 62 DATA ·erfrodataL13<> + 432(SB)/8, $-.780118522218151687E-09 63 DATA ·erfrodataL13<> + 440(SB)/8, $-.130465975877241088E-06 64 DATA ·erfrodataL13<> + 448(SB)/8, $-0.25 65 GLOBL ·erfrodataL13<> + 0(SB), RODATA, $456 66 67 // Table of log correction terms 68 DATA ·erftab2066<> + 0(SB)/8, $0.442737824274138381e-01 69 DATA ·erftab2066<> + 8(SB)/8, $0.263602189790660309e-01 70 DATA ·erftab2066<> + 16(SB)/8, $0.122565642281703586e-01 71 DATA ·erftab2066<> + 24(SB)/8, $0.143757052860721398e-02 72 DATA ·erftab2066<> + 32(SB)/8, $-.651375034121276075e-02 73 DATA ·erftab2066<> + 40(SB)/8, $-.119317678849450159e-01 74 DATA ·erftab2066<> + 48(SB)/8, $-.150868749549871069e-01 75 DATA ·erftab2066<> + 56(SB)/8, $-.161992609578469234e-01 76 DATA ·erftab2066<> + 64(SB)/8, $-.154492360403337917e-01 77 DATA ·erftab2066<> + 72(SB)/8, $-.129850717389178721e-01 78 DATA ·erftab2066<> + 80(SB)/8, $-.892902649276657891e-02 79 DATA ·erftab2066<> + 88(SB)/8, $-.338202636596794887e-02 80 DATA ·erftab2066<> + 96(SB)/8, $0.357266307045684762e-02 81 DATA ·erftab2066<> + 104(SB)/8, $0.118665304327406698e-01 82 DATA ·erftab2066<> + 112(SB)/8, $0.214434994118118914e-01 83 DATA ·erftab2066<> + 120(SB)/8, $0.322580645161290314e-01 84 GLOBL ·erftab2066<> + 0(SB), RODATA, $128 85 86 // Table of +/- 1.0 87 DATA ·erftab12067<> + 0(SB)/8, $1.0 88 DATA ·erftab12067<> + 8(SB)/8, $-1.0 89 GLOBL ·erftab12067<> + 0(SB), RODATA, $16 90 91 // Erf returns the error function of the argument. 92 // 93 // Special cases are: 94 // Erf(+Inf) = 1 95 // Erf(-Inf) = -1 96 // Erf(NaN) = NaN 97 // The algorithm used is minimax polynomial approximation 98 // with coefficients determined with a Remez exchange algorithm. 99 100 TEXT ·erfAsm(SB), NOSPLIT, $0-16 101 FMOVD x+0(FP), F0 102 MOVD $·erfrodataL13<>+0(SB), R5 103 LGDR F0, R1 104 FMOVD F0, F6 105 SRAD $48, R1 106 MOVH $16383, R3 107 RISBGZ $49, $63, $0, R1, R2 108 MOVW R2, R6 109 MOVW R3, R7 110 CMPBGT R6, R7, L2 111 MOVH $12287, R1 112 MOVW R1, R7 113 CMPBLE R6, R7 ,L12 114 MOVH $16367, R1 115 MOVW R1, R7 116 CMPBGT R6, R7, L5 117 FMOVD 448(R5), F4 118 FMADD F0, F0, F4 119 FMOVD 440(R5), F3 120 WFMDB V4, V4, V2 121 FMOVD 432(R5), F0 122 FMOVD 424(R5), F1 123 WFMADB V2, V0, V3, V0 124 FMOVD 416(R5), F3 125 WFMADB V2, V1, V3, V1 126 FMOVD 408(R5), F5 127 FMOVD 400(R5), F3 128 WFMADB V2, V0, V5, V0 129 WFMADB V2, V1, V3, V1 130 FMOVD 392(R5), F5 131 FMOVD 384(R5), F3 132 WFMADB V2, V0, V5, V0 133 WFMADB V2, V1, V3, V1 134 FMOVD 376(R5), F5 135 FMOVD 368(R5), F3 136 WFMADB V2, V0, V5, V0 137 WFMADB V2, V1, V3, V1 138 FMOVD 360(R5), F5 139 FMOVD 352(R5), F3 140 WFMADB V2, V0, V5, V0 141 WFMADB V2, V1, V3, V2 142 WFMADB V4, V0, V2, V0 143 WFMADB V6, V0, V6, V0 144 L1: 145 FMOVD F0, ret+8(FP) 146 RET 147 L2: 148 MOVH R1, R1 149 MOVH $16407, R3 150 SRW $31, R1, R1 151 MOVW R2, R6 152 MOVW R3, R7 153 CMPBLE R6, R7, L6 154 MOVW R1, R1 155 SLD $3, R1, R1 156 MOVD $·erftab12067<>+0(SB), R3 157 WORD $0x68013000 //ld %f0,0(%r1,%r3) 158 MOVH $32751, R1 159 MOVW R1, R7 160 CMPBGT R6, R7, L7 161 FMOVD 344(R5), F2 162 FMADD F2, F0, F0 163 L7: 164 WFCEDBS V6, V6, V2 165 BEQ L1 166 FMOVD F6, F0 167 FMOVD F0, ret+8(FP) 168 RET 169 170 L6: 171 MOVW R1, R1 172 SLD $3, R1, R1 173 MOVD $·erftab12067<>+0(SB), R4 174 WFMDB V0, V0, V1 175 MOVH $0x0, R3 176 WORD $0x68014000 //ld %f0,0(%r1,%r4) 177 MOVH $16399, R1 178 MOVW R2, R6 179 MOVW R1, R7 180 CMPBGT R6, R7, L8 181 FMOVD 336(R5), F3 182 FMOVD 328(R5), F2 183 FMOVD F1, F4 184 WFMADB V1, V2, V3, V2 185 WORD $0xED405140 //adb %f4,.L30-.L13(%r5) 186 BYTE $0x00 187 BYTE $0x1A 188 FMOVD 312(R5), F3 189 WFMADB V1, V2, V3, V2 190 FMOVD 304(R5), F3 191 WFMADB V1, V4, V3, V4 192 FMOVD 296(R5), F3 193 WFMADB V1, V2, V3, V2 194 FMOVD 288(R5), F3 195 WFMADB V1, V4, V3, V4 196 FMOVD 280(R5), F3 197 WFMADB V1, V2, V3, V2 198 FMOVD 272(R5), F3 199 WFMADB V1, V4, V3, V4 200 L9: 201 FMOVD 264(R5), F3 202 FMUL F4, F6 203 FMOVD 256(R5), F4 204 WFMADB V1, V4, V3, V4 205 FDIV F6, F2 206 LGDR F4, R1 207 FSUB F3, F4 208 FMOVD 248(R5), F6 209 WFMSDB V4, V6, V1, V4 210 FMOVD 240(R5), F1 211 FMOVD 232(R5), F6 212 WFMADB V4, V6, V1, V6 213 FMOVD 224(R5), F1 214 FMOVD 216(R5), F3 215 WFMADB V4, V3, V1, V3 216 WFMDB V4, V4, V1 217 FMOVD 208(R5), F5 218 WFMADB V6, V1, V3, V6 219 FMOVD 200(R5), F3 220 MOVH R1,R1 221 WFMADB V4, V3, V5, V3 222 RISBGZ $57, $60, $3, R1, R2 223 WFMADB V1, V6, V3, V6 224 RISBGN $0, $15, $48, R1, R3 225 MOVD $·erftab2066<>+0(SB), R1 226 FMOVD 192(R5), F1 227 LDGR R3, F3 228 WORD $0xED221000 //madb %f2,%f2,0(%r2,%r1) 229 BYTE $0x20 230 BYTE $0x1E 231 WFMADB V4, V6, V1, V4 232 FMUL F3, F2 233 FMADD F4, F2, F0 234 FMOVD F0, ret+8(FP) 235 RET 236 L12: 237 FMOVD 184(R5), F0 238 WFMADB V6, V0, V6, V0 239 FMOVD F0, ret+8(FP) 240 RET 241 L5: 242 FMOVD 176(R5), F1 243 FMADD F0, F0, F1 244 FMOVD 168(R5), F3 245 WFMDB V1, V1, V2 246 FMOVD 160(R5), F0 247 FMOVD 152(R5), F4 248 WFMADB V2, V0, V3, V0 249 FMOVD 144(R5), F3 250 WFMADB V2, V4, V3, V4 251 FMOVD 136(R5), F5 252 FMOVD 128(R5), F3 253 WFMADB V2, V0, V5, V0 254 WFMADB V2, V4, V3, V4 255 FMOVD 120(R5), F5 256 FMOVD 112(R5), F3 257 WFMADB V2, V0, V5, V0 258 WFMADB V2, V4, V3, V4 259 FMOVD 104(R5), F5 260 FMOVD 96(R5), F3 261 WFMADB V2, V0, V5, V0 262 WFMADB V2, V4, V3, V4 263 FMOVD 88(R5), F5 264 FMOVD 80(R5), F3 265 WFMADB V2, V0, V5, V0 266 WFMADB V2, V4, V3, V4 267 FMOVD 72(R5), F5 268 FMOVD 64(R5), F3 269 WFMADB V2, V0, V5, V0 270 WFMADB V2, V4, V3, V4 271 FMOVD 56(R5), F5 272 FMOVD 48(R5), F3 273 WFMADB V2, V0, V5, V0 274 WFMADB V2, V4, V3, V2 275 FMOVD 40(R5), F4 276 WFMADB V1, V0, V2, V0 277 FMUL F6, F0 278 FMADD F4, F6, F0 279 FMOVD F0, ret+8(FP) 280 RET 281 L8: 282 FMOVD 32(R5), F3 283 FMOVD 24(R5), F2 284 FMOVD F1, F4 285 WFMADB V1, V2, V3, V2 286 WORD $0xED405010 //adb %f4,.L68-.L13(%r5) 287 BYTE $0x00 288 BYTE $0x1A 289 FMOVD 8(R5), F3 290 WFMADB V1, V2, V3, V2 291 FMOVD ·erfrodataL13<>+0(SB), F3 292 WFMADB V1, V4, V3, V4 293 BR L9