github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/math/big/arith_s390x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !math_big_pure_go,s390x 6 7 #include "textflag.h" 8 9 // This file provides fast assembly versions for the elementary 10 // arithmetic operations on vectors implemented in arith.go. 11 12 TEXT ·mulWW(SB),NOSPLIT,$0 13 MOVD x+0(FP), R3 14 MOVD y+8(FP), R4 15 MULHDU R3, R4 16 MOVD R10, z1+16(FP) 17 MOVD R11, z0+24(FP) 18 RET 19 20 // func divWW(x1, x0, y Word) (q, r Word) 21 TEXT ·divWW(SB),NOSPLIT,$0 22 MOVD x1+0(FP), R10 23 MOVD x0+8(FP), R11 24 MOVD y+16(FP), R5 25 WORD $0xb98700a5 // dlgr r10,r5 26 MOVD R11, q+24(FP) 27 MOVD R10, r+32(FP) 28 RET 29 30 // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 31 // func addVV(z, x, y []Word) (c Word) 32 TEXT ·addVV(SB),NOSPLIT,$0 33 MOVD z_len+8(FP), R3 34 MOVD x+24(FP), R8 35 MOVD y+48(FP), R9 36 MOVD z+0(FP), R2 37 38 MOVD $0, R4 // c = 0 39 MOVD $0, R0 // make sure it's zero 40 MOVD $0, R10 // i = 0 41 42 // s/JL/JMP/ below to disable the unrolled loop 43 SUB $4, R3 // n -= 4 44 BLT v1 // if n < 0 goto v1 45 46 U1: // n >= 0 47 // regular loop body unrolled 4x 48 MOVD 0(R8)(R10*1), R5 49 MOVD 8(R8)(R10*1), R6 50 MOVD 16(R8)(R10*1), R7 51 MOVD 24(R8)(R10*1), R1 52 ADDC R4, R4 // restore CF 53 MOVD 0(R9)(R10*1), R11 54 ADDE R11, R5 55 MOVD 8(R9)(R10*1), R11 56 ADDE R11, R6 57 MOVD 16(R9)(R10*1), R11 58 ADDE R11, R7 59 MOVD 24(R9)(R10*1), R11 60 ADDE R11, R1 61 MOVD R0, R4 62 ADDE R4, R4 // save CF 63 NEG R4, R4 64 MOVD R5, 0(R2)(R10*1) 65 MOVD R6, 8(R2)(R10*1) 66 MOVD R7, 16(R2)(R10*1) 67 MOVD R1, 24(R2)(R10*1) 68 69 70 ADD $32, R10 // i += 4 71 SUB $4, R3 // n -= 4 72 BGE U1 // if n >= 0 goto U1 73 74 v1: ADD $4, R3 // n += 4 75 BLE E1 // if n <= 0 goto E1 76 77 L1: // n > 0 78 ADDC R4, R4 // restore CF 79 MOVD 0(R8)(R10*1), R5 80 MOVD 0(R9)(R10*1), R11 81 ADDE R11, R5 82 MOVD R5, 0(R2)(R10*1) 83 MOVD R0, R4 84 ADDE R4, R4 // save CF 85 NEG R4, R4 86 87 ADD $8, R10 // i++ 88 SUB $1, R3 // n-- 89 BGT L1 // if n > 0 goto L1 90 91 E1: NEG R4, R4 92 MOVD R4, c+72(FP) // return c 93 RET 94 95 // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 96 // func subVV(z, x, y []Word) (c Word) 97 // (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names) 98 TEXT ·subVV(SB),NOSPLIT,$0 99 MOVD z_len+8(FP), R3 100 MOVD x+24(FP), R8 101 MOVD y+48(FP), R9 102 MOVD z+0(FP), R2 103 104 MOVD $0, R4 // c = 0 105 MOVD $0, R0 // make sure it's zero 106 MOVD $0, R10 // i = 0 107 108 // s/JL/JMP/ below to disable the unrolled loop 109 SUB $4, R3 // n -= 4 110 BLT v1 // if n < 0 goto v1 111 112 U1: // n >= 0 113 // regular loop body unrolled 4x 114 MOVD 0(R8)(R10*1), R5 115 MOVD 8(R8)(R10*1), R6 116 MOVD 16(R8)(R10*1), R7 117 MOVD 24(R8)(R10*1), R1 118 MOVD R0, R11 119 SUBC R4, R11 // restore CF 120 MOVD 0(R9)(R10*1), R11 121 SUBE R11, R5 122 MOVD 8(R9)(R10*1), R11 123 SUBE R11, R6 124 MOVD 16(R9)(R10*1), R11 125 SUBE R11, R7 126 MOVD 24(R9)(R10*1), R11 127 SUBE R11, R1 128 MOVD R0, R4 129 SUBE R4, R4 // save CF 130 MOVD R5, 0(R2)(R10*1) 131 MOVD R6, 8(R2)(R10*1) 132 MOVD R7, 16(R2)(R10*1) 133 MOVD R1, 24(R2)(R10*1) 134 135 136 ADD $32, R10 // i += 4 137 SUB $4, R3 // n -= 4 138 BGE U1 // if n >= 0 goto U1 139 140 v1: ADD $4, R3 // n += 4 141 BLE E1 // if n <= 0 goto E1 142 143 L1: // n > 0 144 MOVD R0, R11 145 SUBC R4, R11 // restore CF 146 MOVD 0(R8)(R10*1), R5 147 MOVD 0(R9)(R10*1), R11 148 SUBE R11, R5 149 MOVD R5, 0(R2)(R10*1) 150 MOVD R0, R4 151 SUBE R4, R4 // save CF 152 153 ADD $8, R10 // i++ 154 SUB $1, R3 // n-- 155 BGT L1 // if n > 0 goto L1 156 157 E1: NEG R4, R4 158 MOVD R4, c+72(FP) // return c 159 RET 160 161 162 // func addVW(z, x []Word, y Word) (c Word) 163 TEXT ·addVW(SB),NOSPLIT,$0 164 //DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) 165 MOVD z_len+8(FP), R3 166 MOVD x+24(FP), R8 167 MOVD y+48(FP), R4 // c = y 168 MOVD z+0(FP), R2 169 MOVD $0, R0 // make sure it's 0 170 MOVD $0, R10 // i = 0 171 172 // s/JL/JMP/ below to disable the unrolled loop 173 SUB $4, R3 // n -= 4 174 BLT v4 // if n < 4 goto v4 175 176 U4: // n >= 0 177 // regular loop body unrolled 4x 178 MOVD 0(R8)(R10*1), R5 179 MOVD 8(R8)(R10*1), R6 180 MOVD 16(R8)(R10*1), R7 181 MOVD 24(R8)(R10*1), R1 182 ADDC R4, R5 183 ADDE R0, R6 184 ADDE R0, R7 185 ADDE R0, R1 186 ADDE R0, R0 187 MOVD R0, R4 // save CF 188 SUB R0, R0 189 MOVD R5, 0(R2)(R10*1) 190 MOVD R6, 8(R2)(R10*1) 191 MOVD R7, 16(R2)(R10*1) 192 MOVD R1, 24(R2)(R10*1) 193 194 ADD $32, R10 // i += 4 -> i +=32 195 SUB $4, R3 // n -= 4 196 BGE U4 // if n >= 0 goto U4 197 198 v4: ADD $4, R3 // n += 4 199 BLE E4 // if n <= 0 goto E4 200 201 L4: // n > 0 202 MOVD 0(R8)(R10*1), R5 203 ADDC R4, R5 204 ADDE R0, R0 205 MOVD R0, R4 // save CF 206 SUB R0, R0 207 MOVD R5, 0(R2)(R10*1) 208 209 ADD $8, R10 // i++ 210 SUB $1, R3 // n-- 211 BGT L4 // if n > 0 goto L4 212 213 E4: MOVD R4, c+56(FP) // return c 214 215 RET 216 217 //DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) 218 // func subVW(z, x []Word, y Word) (c Word) 219 // (same as addVW except for SUBC/SUBE instead of ADDC/ADDE and label names) 220 TEXT ·subVW(SB),NOSPLIT,$0 221 MOVD z_len+8(FP), R3 222 MOVD x+24(FP), R8 223 MOVD y+48(FP), R4 // c = y 224 MOVD z+0(FP), R2 225 MOVD $0, R0 // make sure it's 0 226 MOVD $0, R10 // i = 0 227 228 // s/JL/JMP/ below to disable the unrolled loop 229 SUB $4, R3 // n -= 4 230 BLT v4 // if n < 4 goto v4 231 232 U4: // n >= 0 233 // regular loop body unrolled 4x 234 MOVD 0(R8)(R10*1), R5 235 MOVD 8(R8)(R10*1), R6 236 MOVD 16(R8)(R10*1), R7 237 MOVD 24(R8)(R10*1), R1 238 SUBC R4, R5 //SLGR -> SUBC 239 SUBE R0, R6 //SLBGR -> SUBE 240 SUBE R0, R7 241 SUBE R0, R1 242 SUBE R4, R4 // save CF 243 NEG R4, R4 244 MOVD R5, 0(R2)(R10*1) 245 MOVD R6, 8(R2)(R10*1) 246 MOVD R7, 16(R2)(R10*1) 247 MOVD R1, 24(R2)(R10*1) 248 249 ADD $32, R10 // i += 4 -> i +=32 250 SUB $4, R3 // n -= 4 251 BGE U4 // if n >= 0 goto U4 252 253 v4: ADD $4, R3 // n += 4 254 BLE E4 // if n <= 0 goto E4 255 256 L4: // n > 0 257 MOVD 0(R8)(R10*1), R5 258 SUBC R4, R5 259 SUBE R4, R4 // save CF 260 NEG R4, R4 261 MOVD R5, 0(R2)(R10*1) 262 263 ADD $8, R10 // i++ 264 SUB $1, R3 // n-- 265 BGT L4 // if n > 0 goto L4 266 267 E4: MOVD R4, c+56(FP) // return c 268 269 RET 270 271 // func shlVU(z, x []Word, s uint) (c Word) 272 TEXT ·shlVU(SB),NOSPLIT,$0 273 MOVD z_len+8(FP), R5 274 SUB $1, R5 // n-- 275 BLT X8b // n < 0 (n <= 0) 276 277 // n > 0 278 MOVD s+48(FP), R4 279 CMPBEQ R0, R4, Z80 //handle 0 case beq 280 MOVD $64, R6 281 CMPBEQ R6, R4, Z864 //handle 64 case beq 282 MOVD z+0(FP), R2 283 MOVD x+24(FP), R8 284 SLD $3, R5 // n = n*8 285 SUB R4, R6, R7 286 MOVD (R8)(R5*1), R10 // w1 = x[i-1] 287 SRD R7, R10, R3 288 MOVD R3, c+56(FP) 289 290 MOVD $0, R1 // i = 0 291 BR E8 292 293 // i < n-1 294 L8: MOVD R10, R3 // w = w1 295 MOVD -8(R8)(R5*1), R10 // w1 = x[i+1] 296 297 SLD R4, R3 // w<<s | w1>>ŝ 298 SRD R7, R10, R6 299 OR R6, R3 300 MOVD R3, (R2)(R5*1) // z[i] = w<<s | w1>>ŝ 301 SUB $8, R5 // i-- 302 303 E8: CMPBGT R5, R0, L8 // i < n-1 304 305 // i >= n-1 306 X8a: SLD R4, R10 // w1<<s 307 MOVD R10, (R2) // z[0] = w1<<s 308 RET 309 310 X8b: MOVD R0, c+56(FP) 311 RET 312 313 Z80: MOVD z+0(FP), R2 314 MOVD x+24(FP), R8 315 SLD $3, R5 // n = n*8 316 317 MOVD (R8), R10 318 MOVD $0, R3 319 MOVD R3, c+56(FP) 320 321 MOVD $0, R1 // i = 0 322 BR E8Z 323 324 // i < n-1 325 L8Z: MOVD R10, R3 326 MOVD 8(R8)(R1*1), R10 327 328 MOVD R3, (R2)(R1*1) 329 ADD $8, R1 330 331 E8Z: CMPBLT R1, R5, L8Z 332 333 // i >= n-1 334 MOVD R10, (R2)(R5*1) 335 RET 336 337 Z864: MOVD z+0(FP), R2 338 MOVD x+24(FP), R8 339 SLD $3, R5 // n = n*8 340 MOVD (R8)(R5*1), R3 // w1 = x[n-1] 341 MOVD R3, c+56(FP) // z[i] = x[n-1] 342 343 BR E864 344 345 // i < n-1 346 L864: MOVD -8(R8)(R5*1), R3 347 348 MOVD R3, (R2)(R5*1) // z[i] = x[n-1] 349 SUB $8, R5 // i-- 350 351 E864: CMPBGT R5, R0, L864 // i < n-1 352 353 MOVD R0, (R2) // z[n-1] = 0 354 RET 355 356 357 // CX = R4, r8 = r8, r10 = r2 , r11 = r5, DX = r3, AX = r10 , BX = R1 , 64-count = r7 (R0 set to 0) temp = R6 358 // func shrVU(z, x []Word, s uint) (c Word) 359 TEXT ·shrVU(SB),NOSPLIT,$0 360 MOVD z_len+8(FP), R5 361 SUB $1, R5 // n-- 362 BLT X9b // n < 0 (n <= 0) 363 364 // n > 0 365 MOVD s+48(FP), R4 366 CMPBEQ R0, R4, ZB0 //handle 0 case beq 367 MOVD $64, R6 368 CMPBEQ R6, R4, ZB64 //handle 64 case beq 369 MOVD z+0(FP), R2 370 MOVD x+24(FP), R8 371 SLD $3, R5 // n = n*8 372 SUB R4, R6, R7 373 MOVD (R8), R10 // w1 = x[0] 374 SLD R7, R10, R3 375 MOVD R3, c+56(FP) 376 377 MOVD $0, R1 // i = 0 378 BR E9 379 380 // i < n-1 381 L9: MOVD R10, R3 // w = w1 382 MOVD 8(R8)(R1*1), R10 // w1 = x[i+1] 383 384 SRD R4, R3 // w>>s | w1<<s 385 SLD R7, R10, R6 386 OR R6, R3 387 MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s 388 ADD $8, R1 // i++ 389 390 E9: CMPBLT R1, R5, L9 // i < n-1 391 392 // i >= n-1 393 X9a: SRD R4, R10 // w1>>s 394 MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s 395 RET 396 397 X9b: MOVD R0, c+56(FP) 398 RET 399 400 ZB0: MOVD z+0(FP), R2 401 MOVD x+24(FP), R8 402 SLD $3, R5 // n = n*8 403 404 MOVD (R8), R10 // w1 = x[0] 405 MOVD $0, R3 // R10 << 64 406 MOVD R3, c+56(FP) 407 408 MOVD $0, R1 // i = 0 409 BR E9Z 410 411 // i < n-1 412 L9Z: MOVD R10, R3 // w = w1 413 MOVD 8(R8)(R1*1), R10 // w1 = x[i+1] 414 415 MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s 416 ADD $8, R1 // i++ 417 418 E9Z: CMPBLT R1, R5, L9Z // i < n-1 419 420 // i >= n-1 421 MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s 422 RET 423 424 ZB64: MOVD z+0(FP), R2 425 MOVD x+24(FP), R8 426 SLD $3, R5 // n = n*8 427 MOVD (R8), R3 // w1 = x[0] 428 MOVD R3, c+56(FP) 429 430 MOVD $0, R1 // i = 0 431 BR E964 432 433 // i < n-1 434 L964: MOVD 8(R8)(R1*1), R3 // w1 = x[i+1] 435 436 MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s 437 ADD $8, R1 // i++ 438 439 E964: CMPBLT R1, R5, L964 // i < n-1 440 441 // i >= n-1 442 MOVD $0, R10 // w1>>s 443 MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s 444 RET 445 446 // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, DX = r3, AX = r6 , BX = R1 , (R0 set to 0) + use R11 + use R7 for i 447 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 448 TEXT ·mulAddVWW(SB),NOSPLIT,$0 449 MOVD z+0(FP), R2 450 MOVD x+24(FP), R8 451 MOVD y+48(FP), R9 452 MOVD r+56(FP), R4 // c = r 453 MOVD z_len+8(FP), R5 454 MOVD $0, R1 // i = 0 455 MOVD $0, R7 // i*8 = 0 456 MOVD $0, R0 // make sure it's zero 457 BR E5 458 459 L5: MOVD (R8)(R1*1), R6 460 MULHDU R9, R6 461 ADDC R4, R11 //add to low order bits 462 ADDE R0, R6 463 MOVD R11, (R2)(R1*1) 464 MOVD R6, R4 465 ADD $8, R1 // i*8 + 8 466 ADD $1, R7 // i++ 467 468 E5: CMPBLT R7, R5, L5 // i < n 469 470 MOVD R4, c+64(FP) 471 RET 472 473 // func addMulVVW(z, x []Word, y Word) (c Word) 474 // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1 , (R0 set to 0) + use R11 + use R7 for i 475 TEXT ·addMulVVW(SB),NOSPLIT,$0 476 MOVD z+0(FP), R2 477 MOVD x+24(FP), R8 478 MOVD y+48(FP), R9 479 MOVD z_len+8(FP), R5 480 481 MOVD $0, R1 // i*8 = 0 482 MOVD $0, R7 // i = 0 483 MOVD $0, R0 // make sure it's zero 484 MOVD $0, R4 // c = 0 485 486 MOVD R5, R12 487 AND $-2, R12 488 CMPBGE R5, $2, A6 489 BR E6 490 491 A6: MOVD (R8)(R1*1), R6 492 MULHDU R9, R6 493 MOVD (R2)(R1*1), R10 494 ADDC R10, R11 //add to low order bits 495 ADDE R0, R6 496 ADDC R4, R11 497 ADDE R0, R6 498 MOVD R6, R4 499 MOVD R11, (R2)(R1*1) 500 501 MOVD (8)(R8)(R1*1), R6 502 MULHDU R9, R6 503 MOVD (8)(R2)(R1*1), R10 504 ADDC R10, R11 //add to low order bits 505 ADDE R0, R6 506 ADDC R4, R11 507 ADDE R0, R6 508 MOVD R6, R4 509 MOVD R11, (8)(R2)(R1*1) 510 511 ADD $16, R1 // i*8 + 8 512 ADD $2, R7 // i++ 513 514 CMPBLT R7, R12, A6 515 BR E6 516 517 L6: MOVD (R8)(R1*1), R6 518 MULHDU R9, R6 519 MOVD (R2)(R1*1), R10 520 ADDC R10, R11 //add to low order bits 521 ADDE R0, R6 522 ADDC R4, R11 523 ADDE R0, R6 524 MOVD R6, R4 525 MOVD R11, (R2)(R1*1) 526 527 ADD $8, R1 // i*8 + 8 528 ADD $1, R7 // i++ 529 530 E6: CMPBLT R7, R5, L6 // i < n 531 532 MOVD R4, c+56(FP) 533 RET 534 535 // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) 536 // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1(*8) , (R0 set to 0) + use R11 + use R7 for i 537 TEXT ·divWVW(SB),NOSPLIT,$0 538 MOVD z+0(FP), R2 539 MOVD xn+24(FP), R10 // r = xn 540 MOVD x+32(FP), R8 541 MOVD y+56(FP), R9 542 MOVD z_len+8(FP), R7 // i = z 543 SLD $3, R7, R1 // i*8 544 MOVD $0, R0 // make sure it's zero 545 BR E7 546 547 L7: MOVD (R8)(R1*1), R11 548 WORD $0xB98700A9 //DLGR R10,R9 549 MOVD R11, (R2)(R1*1) 550 551 E7: SUB $1, R7 // i-- 552 SUB $8, R1 553 BGE L7 // i >= 0 554 555 MOVD R10, r+64(FP) 556 RET 557 558 // func bitLen(x Word) (n int) 559 TEXT ·bitLen(SB),NOSPLIT,$0 560 MOVD x+0(FP), R2 561 WORD $0xb9830022 // FLOGR R2,R2 562 MOVD $64, R3 563 SUB R2, R3 564 MOVD R3, n+8(FP) 565 RET