github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa64/submul_1.asm (about) 1 dnl HP-PA 2.0 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and 2 dnl subtract the result from a second limb vector. 3 4 dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C 8000,8200: 7 36 C 8500,8600,8700: 6.5 37 38 C The feed-in and wind-down code has not yet been scheduled. Many cycles 39 C could be saved there per call. 40 41 C DESCRIPTION: 42 C The main loop "BIG" is 4-way unrolled, mainly to allow 43 C effective use of ADD,DC. Delays in moving data via the cache from the FP 44 C registers to the IU registers, have demanded a deep software pipeline, and 45 C a lot of stack slots for partial products in flight. 46 C 47 C CODE STRUCTURE: 48 C save-some-registers 49 C do 0, 1, 2, or 3 limbs 50 C if done, restore-some-regs and return 51 C save-many-regs 52 C do 4, 8, ... limb 53 C restore-all-regs 54 55 C STACK LAYOUT: 56 C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 57 C slots marked FREE, as well as some slots in the caller's "frame marker". 58 C 59 C -00 <- r30 60 C -08 FREE 61 C -10 tmp 62 C -18 tmp 63 C -20 tmp 64 C -28 tmp 65 C -30 tmp 66 C -38 tmp 67 C -40 tmp 68 C -48 tmp 69 C -50 tmp 70 C -58 tmp 71 C -60 tmp 72 C -68 tmp 73 C -70 tmp 74 C -78 tmp 75 C -80 tmp 76 C -88 tmp 77 C -90 FREE 78 C -98 FREE 79 C -a0 FREE 80 C -a8 FREE 81 C -b0 r13 82 C -b8 r12 83 C -c0 r11 84 C -c8 r10 85 C -d0 r8 86 C -d8 r8 87 C -e0 r7 88 C -e8 r6 89 C -f0 r5 90 C -f8 r4 91 C -100 r3 92 C Previous frame: 93 C [unused area] 94 C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 95 96 97 include(`../config.m4') 98 99 C INPUT PARAMETERS: 100 define(`rp',`%r26') C 101 define(`up',`%r25') C 102 define(`n',`%r24') C 103 define(`vlimb',`%r23') C 104 105 define(`climb',`%r23') C 106 107 ifdef(`HAVE_ABI_2_0w', 108 ` .level 2.0w 109 ',` .level 2.0 110 ') 111 PROLOGUE(mpn_submul_1) 112 113 ifdef(`HAVE_ABI_2_0w', 114 ` std vlimb, -0x38(%r30) C store vlimb into "home" slot 115 ') 116 std,ma %r3, 0x100(%r30) 117 std %r4, -0xf8(%r30) 118 std %r5, -0xf0(%r30) 119 ldo 0(%r0), climb C clear climb 120 fldd -0x138(%r30), %fr8 C put vlimb in fp register 121 122 define(`p032a1',`%r1') C 123 define(`p032a2',`%r19') C 124 125 define(`m032',`%r20') C 126 define(`m096',`%r21') C 127 128 define(`p000a',`%r22') C 129 define(`p064a',`%r29') C 130 131 define(`s000',`%r31') C 132 133 define(`ma000',`%r4') C 134 define(`ma064',`%r20') C 135 136 define(`r000',`%r3') C 137 138 extrd,u n, 63, 2, %r5 139 cmpb,= %r5, %r0, L(BIG) 140 nop 141 142 fldd 0(up), %fr4 143 ldo 8(up), up 144 xmpyu %fr8R, %fr4L, %fr22 145 xmpyu %fr8L, %fr4R, %fr23 146 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 147 xmpyu %fr8R, %fr4R, %fr24 148 xmpyu %fr8L, %fr4L, %fr25 149 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 150 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 151 addib,<> -1, %r5, L(two_or_more) 152 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 153 LDEF(one) 154 ldd -0x78(%r30), p032a1 155 ldd -0x70(%r30), p032a2 156 ldd -0x80(%r30), p000a 157 b L(0_one_out) 158 ldd -0x68(%r30), p064a 159 160 LDEF(two_or_more) 161 fldd 0(up), %fr4 162 ldo 8(up), up 163 xmpyu %fr8R, %fr4L, %fr22 164 xmpyu %fr8L, %fr4R, %fr23 165 ldd -0x78(%r30), p032a1 166 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 167 xmpyu %fr8R, %fr4R, %fr24 168 xmpyu %fr8L, %fr4L, %fr25 169 ldd -0x70(%r30), p032a2 170 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 171 ldd -0x80(%r30), p000a 172 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 173 ldd -0x68(%r30), p064a 174 addib,<> -1, %r5, L(three_or_more) 175 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 176 LDEF(two) 177 add p032a1, p032a2, m032 178 add,dc %r0, %r0, m096 179 depd,z m032, 31, 32, ma000 180 extrd,u m032, 31, 32, ma064 181 ldd 0(rp), r000 182 b L(0_two_out) 183 depd m096, 31, 32, ma064 184 185 LDEF(three_or_more) 186 fldd 0(up), %fr4 187 add p032a1, p032a2, m032 188 add,dc %r0, %r0, m096 189 depd,z m032, 31, 32, ma000 190 extrd,u m032, 31, 32, ma064 191 ldd 0(rp), r000 192 C addib,= -1, %r5, L(0_out) 193 depd m096, 31, 32, ma064 194 LDEF(loop0) 195 C xmpyu %fr8R, %fr4L, %fr22 196 C xmpyu %fr8L, %fr4R, %fr23 197 C ldd -0x78(%r30), p032a1 198 C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 199 C 200 C xmpyu %fr8R, %fr4R, %fr24 201 C xmpyu %fr8L, %fr4L, %fr25 202 C ldd -0x70(%r30), p032a2 203 C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 204 C 205 C ldo 8(rp), rp 206 C add climb, p000a, s000 207 C ldd -0x80(%r30), p000a 208 C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 209 C 210 C add,dc p064a, %r0, climb 211 C ldo 8(up), up 212 C ldd -0x68(%r30), p064a 213 C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 214 C 215 C add ma000, s000, s000 216 C add,dc ma064, climb, climb 217 C fldd 0(up), %fr4 218 C 219 C sub r000, s000, s000 220 C sub,db %r0, climb, climb 221 C sub %r0, climb, climb 222 C std s000, -8(rp) 223 C 224 C add p032a1, p032a2, m032 225 C add,dc %r0, %r0, m096 226 C 227 C depd,z m032, 31, 32, ma000 228 C extrd,u m032, 31, 32, ma064 229 C ldd 0(rp), r000 230 C addib,<> -1, %r5, L(loop0) 231 C depd m096, 31, 32, ma064 232 LDEF(0_out) 233 ldo 8(up), up 234 xmpyu %fr8R, %fr4L, %fr22 235 xmpyu %fr8L, %fr4R, %fr23 236 ldd -0x78(%r30), p032a1 237 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 238 xmpyu %fr8R, %fr4R, %fr24 239 xmpyu %fr8L, %fr4L, %fr25 240 ldd -0x70(%r30), p032a2 241 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 242 ldo 8(rp), rp 243 add climb, p000a, s000 244 ldd -0x80(%r30), p000a 245 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 246 add,dc p064a, %r0, climb 247 ldd -0x68(%r30), p064a 248 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 249 add ma000, s000, s000 250 add,dc ma064, climb, climb 251 sub r000, s000, s000 252 sub,db %r0, climb, climb 253 sub %r0, climb, climb 254 std s000, -8(rp) 255 add p032a1, p032a2, m032 256 add,dc %r0, %r0, m096 257 depd,z m032, 31, 32, ma000 258 extrd,u m032, 31, 32, ma064 259 ldd 0(rp), r000 260 depd m096, 31, 32, ma064 261 LDEF(0_two_out) 262 ldd -0x78(%r30), p032a1 263 ldd -0x70(%r30), p032a2 264 ldo 8(rp), rp 265 add climb, p000a, s000 266 ldd -0x80(%r30), p000a 267 add,dc p064a, %r0, climb 268 ldd -0x68(%r30), p064a 269 add ma000, s000, s000 270 add,dc ma064, climb, climb 271 sub r000, s000, s000 272 sub,db %r0, climb, climb 273 sub %r0, climb, climb 274 std s000, -8(rp) 275 LDEF(0_one_out) 276 add p032a1, p032a2, m032 277 add,dc %r0, %r0, m096 278 depd,z m032, 31, 32, ma000 279 extrd,u m032, 31, 32, ma064 280 ldd 0(rp), r000 281 depd m096, 31, 32, ma064 282 283 add climb, p000a, s000 284 add,dc p064a, %r0, climb 285 add ma000, s000, s000 286 add,dc ma064, climb, climb 287 sub r000, s000, s000 288 sub,db %r0, climb, climb 289 sub %r0, climb, climb 290 std s000, 0(rp) 291 292 cmpib,>= 4, n, L(done) 293 ldo 8(rp), rp 294 295 C 4-way unrolled code. 296 297 LDEF(BIG) 298 299 define(`p032a1',`%r1') C 300 define(`p032a2',`%r19') C 301 define(`p096b1',`%r20') C 302 define(`p096b2',`%r21') C 303 define(`p160c1',`%r22') C 304 define(`p160c2',`%r29') C 305 define(`p224d1',`%r31') C 306 define(`p224d2',`%r3') C 307 C 308 define(`m032',`%r4') C 309 define(`m096',`%r5') C 310 define(`m160',`%r6') C 311 define(`m224',`%r7') C 312 define(`m288',`%r8') C 313 C 314 define(`p000a',`%r1') C 315 define(`p064a',`%r19') C 316 define(`p064b',`%r20') C 317 define(`p128b',`%r21') C 318 define(`p128c',`%r22') C 319 define(`p192c',`%r29') C 320 define(`p192d',`%r31') C 321 define(`p256d',`%r3') C 322 C 323 define(`s000',`%r10') C 324 define(`s064',`%r11') C 325 define(`s128',`%r12') C 326 define(`s192',`%r13') C 327 C 328 define(`ma000',`%r9') C 329 define(`ma064',`%r4') C 330 define(`ma128',`%r5') C 331 define(`ma192',`%r6') C 332 define(`ma256',`%r7') C 333 C 334 define(`r000',`%r1') C 335 define(`r064',`%r19') C 336 define(`r128',`%r20') C 337 define(`r192',`%r21') C 338 339 std %r6, -0xe8(%r30) 340 std %r7, -0xe0(%r30) 341 std %r8, -0xd8(%r30) 342 std %r9, -0xd0(%r30) 343 std %r10, -0xc8(%r30) 344 std %r11, -0xc0(%r30) 345 std %r12, -0xb8(%r30) 346 std %r13, -0xb0(%r30) 347 348 ifdef(`HAVE_ABI_2_0w', 349 ` extrd,u n, 61, 62, n C right shift 2 350 ',` extrd,u n, 61, 30, n C right shift 2, zero extend 351 ') 352 353 LDEF(4_or_more) 354 fldd 0(up), %fr4 355 fldd 8(up), %fr5 356 fldd 16(up), %fr6 357 fldd 24(up), %fr7 358 xmpyu %fr8R, %fr4L, %fr22 359 xmpyu %fr8L, %fr4R, %fr23 360 xmpyu %fr8R, %fr5L, %fr24 361 xmpyu %fr8L, %fr5R, %fr25 362 xmpyu %fr8R, %fr6L, %fr26 363 xmpyu %fr8L, %fr6R, %fr27 364 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 365 xmpyu %fr8R, %fr7L, %fr28 366 xmpyu %fr8L, %fr7R, %fr29 367 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 368 xmpyu %fr8R, %fr4R, %fr30 369 xmpyu %fr8L, %fr4L, %fr31 370 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 371 xmpyu %fr8R, %fr5R, %fr22 372 xmpyu %fr8L, %fr5L, %fr23 373 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 374 xmpyu %fr8R, %fr6R, %fr24 375 xmpyu %fr8L, %fr6L, %fr25 376 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 377 xmpyu %fr8R, %fr7R, %fr26 378 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 379 addib,<> -1, n, L(8_or_more) 380 xmpyu %fr8L, %fr7L, %fr27 381 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 382 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 383 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 384 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 385 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 386 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 387 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 388 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 389 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 390 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 391 ldd -0x78(%r30), p032a1 392 ldd -0x70(%r30), p032a2 393 ldd -0x38(%r30), p096b1 394 ldd -0x30(%r30), p096b2 395 ldd -0x58(%r30), p160c1 396 ldd -0x50(%r30), p160c2 397 ldd -0x18(%r30), p224d1 398 ldd -0x10(%r30), p224d2 399 b L(end1) 400 nop 401 402 LDEF(8_or_more) 403 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 404 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 405 ldo 32(up), up 406 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 407 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 408 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 409 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 410 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 411 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 412 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 413 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 414 fldd 0(up), %fr4 415 fldd 8(up), %fr5 416 fldd 16(up), %fr6 417 fldd 24(up), %fr7 418 xmpyu %fr8R, %fr4L, %fr22 419 ldd -0x78(%r30), p032a1 420 xmpyu %fr8L, %fr4R, %fr23 421 xmpyu %fr8R, %fr5L, %fr24 422 ldd -0x70(%r30), p032a2 423 xmpyu %fr8L, %fr5R, %fr25 424 xmpyu %fr8R, %fr6L, %fr26 425 ldd -0x38(%r30), p096b1 426 xmpyu %fr8L, %fr6R, %fr27 427 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 428 xmpyu %fr8R, %fr7L, %fr28 429 ldd -0x30(%r30), p096b2 430 xmpyu %fr8L, %fr7R, %fr29 431 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 432 xmpyu %fr8R, %fr4R, %fr30 433 ldd -0x58(%r30), p160c1 434 xmpyu %fr8L, %fr4L, %fr31 435 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 436 xmpyu %fr8R, %fr5R, %fr22 437 ldd -0x50(%r30), p160c2 438 xmpyu %fr8L, %fr5L, %fr23 439 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 440 xmpyu %fr8R, %fr6R, %fr24 441 ldd -0x18(%r30), p224d1 442 xmpyu %fr8L, %fr6L, %fr25 443 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 444 xmpyu %fr8R, %fr7R, %fr26 445 ldd -0x10(%r30), p224d2 446 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 447 addib,= -1, n, L(end2) 448 xmpyu %fr8L, %fr7L, %fr27 449 LDEF(loop) 450 add p032a1, p032a2, m032 451 ldd -0x80(%r30), p000a 452 add,dc p096b1, p096b2, m096 453 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 454 455 add,dc p160c1, p160c2, m160 456 ldd -0x68(%r30), p064a 457 add,dc p224d1, p224d2, m224 458 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 459 460 add,dc %r0, %r0, m288 461 ldd -0x40(%r30), p064b 462 ldo 32(up), up 463 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 464 465 depd,z m032, 31, 32, ma000 466 ldd -0x28(%r30), p128b 467 extrd,u m032, 31, 32, ma064 468 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 469 470 depd m096, 31, 32, ma064 471 ldd -0x60(%r30), p128c 472 extrd,u m096, 31, 32, ma128 473 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 474 475 depd m160, 31, 32, ma128 476 ldd -0x48(%r30), p192c 477 extrd,u m160, 31, 32, ma192 478 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 479 480 depd m224, 31, 32, ma192 481 ldd -0x20(%r30), p192d 482 extrd,u m224, 31, 32, ma256 483 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 484 485 depd m288, 31, 32, ma256 486 ldd -0x88(%r30), p256d 487 add climb, p000a, s000 488 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 489 490 add,dc p064a, p064b, s064 491 ldd 0(rp), r000 492 add,dc p128b, p128c, s128 493 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 494 495 add,dc p192c, p192d, s192 496 ldd 8(rp), r064 497 add,dc p256d, %r0, climb 498 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 499 500 ldd 16(rp), r128 501 add ma000, s000, s000 C accum mid 0 502 ldd 24(rp), r192 503 add,dc ma064, s064, s064 C accum mid 1 504 505 add,dc ma128, s128, s128 C accum mid 2 506 fldd 0(up), %fr4 507 add,dc ma192, s192, s192 C accum mid 3 508 fldd 8(up), %fr5 509 510 add,dc ma256, climb, climb 511 fldd 16(up), %fr6 512 sub r000, s000, s000 C accum rlimb 0 513 fldd 24(up), %fr7 514 515 sub,db r064, s064, s064 C accum rlimb 1 516 sub,db r128, s128, s128 C accum rlimb 2 517 std s000, 0(rp) 518 519 sub,db r192, s192, s192 C accum rlimb 3 520 sub,db %r0, climb, climb 521 sub %r0, climb, climb 522 std s064, 8(rp) 523 524 xmpyu %fr8R, %fr4L, %fr22 525 ldd -0x78(%r30), p032a1 526 xmpyu %fr8L, %fr4R, %fr23 527 std s128, 16(rp) 528 529 xmpyu %fr8R, %fr5L, %fr24 530 ldd -0x70(%r30), p032a2 531 xmpyu %fr8L, %fr5R, %fr25 532 std s192, 24(rp) 533 534 xmpyu %fr8R, %fr6L, %fr26 535 ldd -0x38(%r30), p096b1 536 xmpyu %fr8L, %fr6R, %fr27 537 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 538 539 xmpyu %fr8R, %fr7L, %fr28 540 ldd -0x30(%r30), p096b2 541 xmpyu %fr8L, %fr7R, %fr29 542 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 543 544 xmpyu %fr8R, %fr4R, %fr30 545 ldd -0x58(%r30), p160c1 546 xmpyu %fr8L, %fr4L, %fr31 547 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 548 549 xmpyu %fr8R, %fr5R, %fr22 550 ldd -0x50(%r30), p160c2 551 xmpyu %fr8L, %fr5L, %fr23 552 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 553 554 xmpyu %fr8R, %fr6R, %fr24 555 ldd -0x18(%r30), p224d1 556 xmpyu %fr8L, %fr6L, %fr25 557 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 558 559 xmpyu %fr8R, %fr7R, %fr26 560 ldd -0x10(%r30), p224d2 561 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 562 xmpyu %fr8L, %fr7L, %fr27 563 564 addib,<> -1, n, L(loop) 565 ldo 32(rp), rp 566 567 LDEF(end2) 568 add p032a1, p032a2, m032 569 ldd -0x80(%r30), p000a 570 add,dc p096b1, p096b2, m096 571 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 572 add,dc p160c1, p160c2, m160 573 ldd -0x68(%r30), p064a 574 add,dc p224d1, p224d2, m224 575 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 576 add,dc %r0, %r0, m288 577 ldd -0x40(%r30), p064b 578 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 579 depd,z m032, 31, 32, ma000 580 ldd -0x28(%r30), p128b 581 extrd,u m032, 31, 32, ma064 582 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 583 depd m096, 31, 32, ma064 584 ldd -0x60(%r30), p128c 585 extrd,u m096, 31, 32, ma128 586 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 587 depd m160, 31, 32, ma128 588 ldd -0x48(%r30), p192c 589 extrd,u m160, 31, 32, ma192 590 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 591 depd m224, 31, 32, ma192 592 ldd -0x20(%r30), p192d 593 extrd,u m224, 31, 32, ma256 594 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 595 depd m288, 31, 32, ma256 596 ldd -0x88(%r30), p256d 597 add climb, p000a, s000 598 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 599 add,dc p064a, p064b, s064 600 ldd 0(rp), r000 601 add,dc p128b, p128c, s128 602 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 603 add,dc p192c, p192d, s192 604 ldd 8(rp), r064 605 add,dc p256d, %r0, climb 606 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 607 ldd 16(rp), r128 608 add ma000, s000, s000 C accum mid 0 609 ldd 24(rp), r192 610 add,dc ma064, s064, s064 C accum mid 1 611 add,dc ma128, s128, s128 C accum mid 2 612 add,dc ma192, s192, s192 C accum mid 3 613 add,dc ma256, climb, climb 614 sub r000, s000, s000 C accum rlimb 0 615 sub,db r064, s064, s064 C accum rlimb 1 616 sub,db r128, s128, s128 C accum rlimb 2 617 std s000, 0(rp) 618 sub,db r192, s192, s192 C accum rlimb 3 619 sub,db %r0, climb, climb 620 sub %r0, climb, climb 621 std s064, 8(rp) 622 ldd -0x78(%r30), p032a1 623 std s128, 16(rp) 624 ldd -0x70(%r30), p032a2 625 std s192, 24(rp) 626 ldd -0x38(%r30), p096b1 627 ldd -0x30(%r30), p096b2 628 ldd -0x58(%r30), p160c1 629 ldd -0x50(%r30), p160c2 630 ldd -0x18(%r30), p224d1 631 ldd -0x10(%r30), p224d2 632 ldo 32(rp), rp 633 634 LDEF(end1) 635 add p032a1, p032a2, m032 636 ldd -0x80(%r30), p000a 637 add,dc p096b1, p096b2, m096 638 add,dc p160c1, p160c2, m160 639 ldd -0x68(%r30), p064a 640 add,dc p224d1, p224d2, m224 641 add,dc %r0, %r0, m288 642 ldd -0x40(%r30), p064b 643 depd,z m032, 31, 32, ma000 644 ldd -0x28(%r30), p128b 645 extrd,u m032, 31, 32, ma064 646 depd m096, 31, 32, ma064 647 ldd -0x60(%r30), p128c 648 extrd,u m096, 31, 32, ma128 649 depd m160, 31, 32, ma128 650 ldd -0x48(%r30), p192c 651 extrd,u m160, 31, 32, ma192 652 depd m224, 31, 32, ma192 653 ldd -0x20(%r30), p192d 654 extrd,u m224, 31, 32, ma256 655 depd m288, 31, 32, ma256 656 ldd -0x88(%r30), p256d 657 add climb, p000a, s000 658 add,dc p064a, p064b, s064 659 ldd 0(rp), r000 660 add,dc p128b, p128c, s128 661 add,dc p192c, p192d, s192 662 ldd 8(rp), r064 663 add,dc p256d, %r0, climb 664 ldd 16(rp), r128 665 add ma000, s000, s000 C accum mid 0 666 ldd 24(rp), r192 667 add,dc ma064, s064, s064 C accum mid 1 668 add,dc ma128, s128, s128 C accum mid 2 669 add,dc ma192, s192, s192 C accum mid 3 670 add,dc ma256, climb, climb 671 sub r000, s000, s000 C accum rlimb 0 672 sub,db r064, s064, s064 C accum rlimb 1 673 sub,db r128, s128, s128 C accum rlimb 2 674 std s000, 0(rp) 675 sub,db r192, s192, s192 C accum rlimb 3 676 sub,db %r0, climb, climb 677 sub %r0, climb, climb 678 std s064, 8(rp) 679 std s128, 16(rp) 680 std s192, 24(rp) 681 682 ldd -0xb0(%r30), %r13 683 ldd -0xb8(%r30), %r12 684 ldd -0xc0(%r30), %r11 685 ldd -0xc8(%r30), %r10 686 ldd -0xd0(%r30), %r9 687 ldd -0xd8(%r30), %r8 688 ldd -0xe0(%r30), %r7 689 ldd -0xe8(%r30), %r6 690 LDEF(done) 691 ifdef(`HAVE_ABI_2_0w', 692 ` copy climb, %r28 693 ',` extrd,u climb, 63, 32, %r29 694 extrd,u climb, 31, 32, %r28 695 ') 696 ldd -0xf0(%r30), %r5 697 ldd -0xf8(%r30), %r4 698 bve (%r2) 699 ldd,mb -0x100(%r30), %r3 700 EPILOGUE(mpn_submul_1)