github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa64/addmul_1.asm (about) 1 dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and 2 dnl add the result to a second limb vector. 3 4 dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C 8000,8200: 7 36 C 8500,8600,8700: 6.375 37 38 C The feed-in and wind-down code has not yet been scheduled. Many cycles 39 C could be saved there per call. 40 41 C DESCRIPTION: 42 C The main loop "BIG" is 4-way unrolled, mainly to allow 43 C effective use of ADD,DC. Delays in moving data via the cache from the FP 44 C registers to the IU registers, have demanded a deep software pipeline, and 45 C a lot of stack slots for partial products in flight. 46 C 47 C CODE STRUCTURE: 48 C save-some-registers 49 C do 0, 1, 2, or 3 limbs 50 C if done, restore-some-regs and return 51 C save-many-regs 52 C do 4, 8, ... limb 53 C restore-all-regs 54 55 C STACK LAYOUT: 56 C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 57 C slots marked FREE, as well as some slots in the caller's "frame marker". 58 C 59 C -00 <- r30 60 C -08 FREE 61 C -10 tmp 62 C -18 tmp 63 C -20 tmp 64 C -28 tmp 65 C -30 tmp 66 C -38 tmp 67 C -40 tmp 68 C -48 tmp 69 C -50 tmp 70 C -58 tmp 71 C -60 tmp 72 C -68 tmp 73 C -70 tmp 74 C -78 tmp 75 C -80 tmp 76 C -88 tmp 77 C -90 FREE 78 C -98 FREE 79 C -a0 FREE 80 C -a8 FREE 81 C -b0 r13 82 C -b8 r12 83 C -c0 r11 84 C -c8 r10 85 C -d0 r8 86 C -d8 r8 87 C -e0 r7 88 C -e8 r6 89 C -f0 r5 90 C -f8 r4 91 C -100 r3 92 C Previous frame: 93 C [unused area] 94 C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 95 96 97 include(`../config.m4') 98 99 C INPUT PARAMETERS: 100 define(`rp',`%r26') C 101 define(`up',`%r25') C 102 define(`n',`%r24') C 103 define(`vlimb',`%r23') C 104 105 define(`climb',`%r23') C 106 107 ifdef(`HAVE_ABI_2_0w', 108 ` .level 2.0w 109 ',` .level 2.0 110 ') 111 PROLOGUE(mpn_addmul_1) 112 113 ifdef(`HAVE_ABI_2_0w', 114 ` std vlimb, -0x38(%r30) C store vlimb into "home" slot 115 ') 116 std,ma %r3, 0x100(%r30) 117 std %r4, -0xf8(%r30) 118 std %r5, -0xf0(%r30) 119 ldo 0(%r0), climb C clear climb 120 fldd -0x138(%r30), %fr8 C put vlimb in fp register 121 122 define(`p032a1',`%r1') C 123 define(`p032a2',`%r19') C 124 125 define(`m032',`%r20') C 126 define(`m096',`%r21') C 127 128 define(`p000a',`%r22') C 129 define(`p064a',`%r29') C 130 131 define(`s000',`%r31') C 132 133 define(`ma000',`%r4') C 134 define(`ma064',`%r20') C 135 136 define(`r000',`%r3') C 137 138 extrd,u n, 63, 2, %r5 139 cmpb,= %r5, %r0, L(BIG) 140 nop 141 142 fldd 0(up), %fr4 143 ldo 8(up), up 144 xmpyu %fr8R, %fr4L, %fr22 145 xmpyu %fr8L, %fr4R, %fr23 146 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 147 xmpyu %fr8R, %fr4R, %fr24 148 xmpyu %fr8L, %fr4L, %fr25 149 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 150 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 151 addib,<> -1, %r5, L(two_or_more) 152 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 153 LDEF(one) 154 ldd -0x78(%r30), p032a1 155 ldd -0x70(%r30), p032a2 156 ldd -0x80(%r30), p000a 157 b L(0_one_out) 158 ldd -0x68(%r30), p064a 159 160 LDEF(two_or_more) 161 fldd 0(up), %fr4 162 ldo 8(up), up 163 xmpyu %fr8R, %fr4L, %fr22 164 xmpyu %fr8L, %fr4R, %fr23 165 ldd -0x78(%r30), p032a1 166 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 167 xmpyu %fr8R, %fr4R, %fr24 168 xmpyu %fr8L, %fr4L, %fr25 169 ldd -0x70(%r30), p032a2 170 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 171 ldd -0x80(%r30), p000a 172 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 173 ldd -0x68(%r30), p064a 174 addib,<> -1, %r5, L(three_or_more) 175 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 176 LDEF(two) 177 add p032a1, p032a2, m032 178 add,dc %r0, %r0, m096 179 depd,z m032, 31, 32, ma000 180 extrd,u m032, 31, 32, ma064 181 ldd 0(rp), r000 182 b L(0_two_out) 183 depd m096, 31, 32, ma064 184 185 LDEF(three_or_more) 186 fldd 0(up), %fr4 187 add p032a1, p032a2, m032 188 add,dc %r0, %r0, m096 189 depd,z m032, 31, 32, ma000 190 extrd,u m032, 31, 32, ma064 191 ldd 0(rp), r000 192 C addib,= -1, %r5, L(0_out) 193 depd m096, 31, 32, ma064 194 LDEF(loop0) 195 C xmpyu %fr8R, %fr4L, %fr22 196 C xmpyu %fr8L, %fr4R, %fr23 197 C ldd -0x78(%r30), p032a1 198 C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 199 C 200 C xmpyu %fr8R, %fr4R, %fr24 201 C xmpyu %fr8L, %fr4L, %fr25 202 C ldd -0x70(%r30), p032a2 203 C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 204 C 205 C ldo 8(rp), rp 206 C add climb, p000a, s000 207 C ldd -0x80(%r30), p000a 208 C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 209 C 210 C add,dc p064a, %r0, climb 211 C ldo 8(up), up 212 C ldd -0x68(%r30), p064a 213 C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 214 C 215 C add ma000, s000, s000 216 C add,dc ma064, climb, climb 217 C fldd 0(up), %fr4 218 C 219 C add r000, s000, s000 220 C add,dc %r0, climb, climb 221 C std s000, -8(rp) 222 C 223 C add p032a1, p032a2, m032 224 C add,dc %r0, %r0, m096 225 C 226 C depd,z m032, 31, 32, ma000 227 C extrd,u m032, 31, 32, ma064 228 C ldd 0(rp), r000 229 C addib,<> -1, %r5, L(loop0) 230 C depd m096, 31, 32, ma064 231 LDEF(0_out) 232 ldo 8(up), up 233 xmpyu %fr8R, %fr4L, %fr22 234 xmpyu %fr8L, %fr4R, %fr23 235 ldd -0x78(%r30), p032a1 236 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 237 xmpyu %fr8R, %fr4R, %fr24 238 xmpyu %fr8L, %fr4L, %fr25 239 ldd -0x70(%r30), p032a2 240 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 241 ldo 8(rp), rp 242 add climb, p000a, s000 243 ldd -0x80(%r30), p000a 244 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 245 add,dc p064a, %r0, climb 246 ldd -0x68(%r30), p064a 247 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 248 add ma000, s000, s000 249 add,dc ma064, climb, climb 250 add r000, s000, s000 251 add,dc %r0, climb, climb 252 std s000, -8(rp) 253 add p032a1, p032a2, m032 254 add,dc %r0, %r0, m096 255 depd,z m032, 31, 32, ma000 256 extrd,u m032, 31, 32, ma064 257 ldd 0(rp), r000 258 depd m096, 31, 32, ma064 259 LDEF(0_two_out) 260 ldd -0x78(%r30), p032a1 261 ldd -0x70(%r30), p032a2 262 ldo 8(rp), rp 263 add climb, p000a, s000 264 ldd -0x80(%r30), p000a 265 add,dc p064a, %r0, climb 266 ldd -0x68(%r30), p064a 267 add ma000, s000, s000 268 add,dc ma064, climb, climb 269 add r000, s000, s000 270 add,dc %r0, climb, climb 271 std s000, -8(rp) 272 LDEF(0_one_out) 273 add p032a1, p032a2, m032 274 add,dc %r0, %r0, m096 275 depd,z m032, 31, 32, ma000 276 extrd,u m032, 31, 32, ma064 277 ldd 0(rp), r000 278 depd m096, 31, 32, ma064 279 280 add climb, p000a, s000 281 add,dc p064a, %r0, climb 282 add ma000, s000, s000 283 add,dc ma064, climb, climb 284 add r000, s000, s000 285 add,dc %r0, climb, climb 286 std s000, 0(rp) 287 288 cmpib,>= 4, n, L(done) 289 ldo 8(rp), rp 290 291 C 4-way unrolled code. 292 293 LDEF(BIG) 294 295 define(`p032a1',`%r1') C 296 define(`p032a2',`%r19') C 297 define(`p096b1',`%r20') C 298 define(`p096b2',`%r21') C 299 define(`p160c1',`%r22') C 300 define(`p160c2',`%r29') C 301 define(`p224d1',`%r31') C 302 define(`p224d2',`%r3') C 303 C 304 define(`m032',`%r4') C 305 define(`m096',`%r5') C 306 define(`m160',`%r6') C 307 define(`m224',`%r7') C 308 define(`m288',`%r8') C 309 C 310 define(`p000a',`%r1') C 311 define(`p064a',`%r19') C 312 define(`p064b',`%r20') C 313 define(`p128b',`%r21') C 314 define(`p128c',`%r22') C 315 define(`p192c',`%r29') C 316 define(`p192d',`%r31') C 317 define(`p256d',`%r3') C 318 C 319 define(`s000',`%r10') C 320 define(`s064',`%r11') C 321 define(`s128',`%r12') C 322 define(`s192',`%r13') C 323 C 324 define(`ma000',`%r9') C 325 define(`ma064',`%r4') C 326 define(`ma128',`%r5') C 327 define(`ma192',`%r6') C 328 define(`ma256',`%r7') C 329 C 330 define(`r000',`%r1') C 331 define(`r064',`%r19') C 332 define(`r128',`%r20') C 333 define(`r192',`%r21') C 334 335 std %r6, -0xe8(%r30) 336 std %r7, -0xe0(%r30) 337 std %r8, -0xd8(%r30) 338 std %r9, -0xd0(%r30) 339 std %r10, -0xc8(%r30) 340 std %r11, -0xc0(%r30) 341 std %r12, -0xb8(%r30) 342 std %r13, -0xb0(%r30) 343 344 ifdef(`HAVE_ABI_2_0w', 345 ` extrd,u n, 61, 62, n C right shift 2 346 ',` extrd,u n, 61, 30, n C right shift 2, zero extend 347 ') 348 349 LDEF(4_or_more) 350 fldd 0(up), %fr4 351 fldd 8(up), %fr5 352 fldd 16(up), %fr6 353 fldd 24(up), %fr7 354 xmpyu %fr8R, %fr4L, %fr22 355 xmpyu %fr8L, %fr4R, %fr23 356 xmpyu %fr8R, %fr5L, %fr24 357 xmpyu %fr8L, %fr5R, %fr25 358 xmpyu %fr8R, %fr6L, %fr26 359 xmpyu %fr8L, %fr6R, %fr27 360 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 361 xmpyu %fr8R, %fr7L, %fr28 362 xmpyu %fr8L, %fr7R, %fr29 363 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 364 xmpyu %fr8R, %fr4R, %fr30 365 xmpyu %fr8L, %fr4L, %fr31 366 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 367 xmpyu %fr8R, %fr5R, %fr22 368 xmpyu %fr8L, %fr5L, %fr23 369 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 370 xmpyu %fr8R, %fr6R, %fr24 371 xmpyu %fr8L, %fr6L, %fr25 372 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 373 xmpyu %fr8R, %fr7R, %fr26 374 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 375 addib,<> -1, n, L(8_or_more) 376 xmpyu %fr8L, %fr7L, %fr27 377 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 378 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 379 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 380 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 381 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 382 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 383 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 384 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 385 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 386 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 387 ldd -0x78(%r30), p032a1 388 ldd -0x70(%r30), p032a2 389 ldd -0x38(%r30), p096b1 390 ldd -0x30(%r30), p096b2 391 ldd -0x58(%r30), p160c1 392 ldd -0x50(%r30), p160c2 393 ldd -0x18(%r30), p224d1 394 ldd -0x10(%r30), p224d2 395 b L(end1) 396 nop 397 398 LDEF(8_or_more) 399 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 400 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 401 ldo 32(up), up 402 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 403 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 404 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 405 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 406 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 407 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 408 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 409 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 410 fldd 0(up), %fr4 411 fldd 8(up), %fr5 412 fldd 16(up), %fr6 413 fldd 24(up), %fr7 414 xmpyu %fr8R, %fr4L, %fr22 415 ldd -0x78(%r30), p032a1 416 xmpyu %fr8L, %fr4R, %fr23 417 xmpyu %fr8R, %fr5L, %fr24 418 ldd -0x70(%r30), p032a2 419 xmpyu %fr8L, %fr5R, %fr25 420 xmpyu %fr8R, %fr6L, %fr26 421 ldd -0x38(%r30), p096b1 422 xmpyu %fr8L, %fr6R, %fr27 423 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 424 xmpyu %fr8R, %fr7L, %fr28 425 ldd -0x30(%r30), p096b2 426 xmpyu %fr8L, %fr7R, %fr29 427 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 428 xmpyu %fr8R, %fr4R, %fr30 429 ldd -0x58(%r30), p160c1 430 xmpyu %fr8L, %fr4L, %fr31 431 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 432 xmpyu %fr8R, %fr5R, %fr22 433 ldd -0x50(%r30), p160c2 434 xmpyu %fr8L, %fr5L, %fr23 435 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 436 xmpyu %fr8R, %fr6R, %fr24 437 ldd -0x18(%r30), p224d1 438 xmpyu %fr8L, %fr6L, %fr25 439 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 440 xmpyu %fr8R, %fr7R, %fr26 441 ldd -0x10(%r30), p224d2 442 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 443 addib,= -1, n, L(end2) 444 xmpyu %fr8L, %fr7L, %fr27 445 LDEF(loop) 446 add p032a1, p032a2, m032 447 ldd -0x80(%r30), p000a 448 add,dc p096b1, p096b2, m096 449 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 450 451 add,dc p160c1, p160c2, m160 452 ldd -0x68(%r30), p064a 453 add,dc p224d1, p224d2, m224 454 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 455 456 add,dc %r0, %r0, m288 457 ldd -0x40(%r30), p064b 458 ldo 32(up), up 459 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 460 461 depd,z m032, 31, 32, ma000 462 ldd -0x28(%r30), p128b 463 extrd,u m032, 31, 32, ma064 464 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 465 466 depd m096, 31, 32, ma064 467 ldd -0x60(%r30), p128c 468 extrd,u m096, 31, 32, ma128 469 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 470 471 depd m160, 31, 32, ma128 472 ldd -0x48(%r30), p192c 473 extrd,u m160, 31, 32, ma192 474 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 475 476 depd m224, 31, 32, ma192 477 ldd -0x20(%r30), p192d 478 extrd,u m224, 31, 32, ma256 479 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 480 481 depd m288, 31, 32, ma256 482 ldd -0x88(%r30), p256d 483 add climb, p000a, s000 484 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 485 486 add,dc p064a, p064b, s064 487 ldd 0(rp), r000 488 add,dc p128b, p128c, s128 489 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 490 491 add,dc p192c, p192d, s192 492 ldd 8(rp), r064 493 add,dc p256d, %r0, climb 494 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 495 496 ldd 16(rp), r128 497 add ma000, s000, s000 C accum mid 0 498 ldd 24(rp), r192 499 add,dc ma064, s064, s064 C accum mid 1 500 501 add,dc ma128, s128, s128 C accum mid 2 502 fldd 0(up), %fr4 503 add,dc ma192, s192, s192 C accum mid 3 504 fldd 8(up), %fr5 505 506 add,dc ma256, climb, climb 507 fldd 16(up), %fr6 508 add r000, s000, s000 C accum rlimb 0 509 fldd 24(up), %fr7 510 511 add,dc r064, s064, s064 C accum rlimb 1 512 add,dc r128, s128, s128 C accum rlimb 2 513 std s000, 0(rp) 514 515 add,dc r192, s192, s192 C accum rlimb 3 516 add,dc %r0, climb, climb 517 std s064, 8(rp) 518 519 xmpyu %fr8R, %fr4L, %fr22 520 ldd -0x78(%r30), p032a1 521 xmpyu %fr8L, %fr4R, %fr23 522 std s128, 16(rp) 523 524 xmpyu %fr8R, %fr5L, %fr24 525 ldd -0x70(%r30), p032a2 526 xmpyu %fr8L, %fr5R, %fr25 527 std s192, 24(rp) 528 529 xmpyu %fr8R, %fr6L, %fr26 530 ldd -0x38(%r30), p096b1 531 xmpyu %fr8L, %fr6R, %fr27 532 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 533 534 xmpyu %fr8R, %fr7L, %fr28 535 ldd -0x30(%r30), p096b2 536 xmpyu %fr8L, %fr7R, %fr29 537 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 538 539 xmpyu %fr8R, %fr4R, %fr30 540 ldd -0x58(%r30), p160c1 541 xmpyu %fr8L, %fr4L, %fr31 542 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 543 544 xmpyu %fr8R, %fr5R, %fr22 545 ldd -0x50(%r30), p160c2 546 xmpyu %fr8L, %fr5L, %fr23 547 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 548 549 xmpyu %fr8R, %fr6R, %fr24 550 ldd -0x18(%r30), p224d1 551 xmpyu %fr8L, %fr6L, %fr25 552 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 553 554 xmpyu %fr8R, %fr7R, %fr26 555 ldd -0x10(%r30), p224d2 556 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 557 xmpyu %fr8L, %fr7L, %fr27 558 559 addib,<> -1, n, L(loop) 560 ldo 32(rp), rp 561 562 LDEF(end2) 563 add p032a1, p032a2, m032 564 ldd -0x80(%r30), p000a 565 add,dc p096b1, p096b2, m096 566 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 567 add,dc p160c1, p160c2, m160 568 ldd -0x68(%r30), p064a 569 add,dc p224d1, p224d2, m224 570 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 571 add,dc %r0, %r0, m288 572 ldd -0x40(%r30), p064b 573 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 574 depd,z m032, 31, 32, ma000 575 ldd -0x28(%r30), p128b 576 extrd,u m032, 31, 32, ma064 577 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 578 depd m096, 31, 32, ma064 579 ldd -0x60(%r30), p128c 580 extrd,u m096, 31, 32, ma128 581 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 582 depd m160, 31, 32, ma128 583 ldd -0x48(%r30), p192c 584 extrd,u m160, 31, 32, ma192 585 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 586 depd m224, 31, 32, ma192 587 ldd -0x20(%r30), p192d 588 extrd,u m224, 31, 32, ma256 589 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 590 depd m288, 31, 32, ma256 591 ldd -0x88(%r30), p256d 592 add climb, p000a, s000 593 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 594 add,dc p064a, p064b, s064 595 ldd 0(rp), r000 596 add,dc p128b, p128c, s128 597 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 598 add,dc p192c, p192d, s192 599 ldd 8(rp), r064 600 add,dc p256d, %r0, climb 601 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 602 ldd 16(rp), r128 603 add ma000, s000, s000 C accum mid 0 604 ldd 24(rp), r192 605 add,dc ma064, s064, s064 C accum mid 1 606 add,dc ma128, s128, s128 C accum mid 2 607 add,dc ma192, s192, s192 C accum mid 3 608 add,dc ma256, climb, climb 609 add r000, s000, s000 C accum rlimb 0 610 add,dc r064, s064, s064 C accum rlimb 1 611 add,dc r128, s128, s128 C accum rlimb 2 612 std s000, 0(rp) 613 add,dc r192, s192, s192 C accum rlimb 3 614 add,dc %r0, climb, climb 615 std s064, 8(rp) 616 ldd -0x78(%r30), p032a1 617 std s128, 16(rp) 618 ldd -0x70(%r30), p032a2 619 std s192, 24(rp) 620 ldd -0x38(%r30), p096b1 621 ldd -0x30(%r30), p096b2 622 ldd -0x58(%r30), p160c1 623 ldd -0x50(%r30), p160c2 624 ldd -0x18(%r30), p224d1 625 ldd -0x10(%r30), p224d2 626 ldo 32(rp), rp 627 628 LDEF(end1) 629 add p032a1, p032a2, m032 630 ldd -0x80(%r30), p000a 631 add,dc p096b1, p096b2, m096 632 add,dc p160c1, p160c2, m160 633 ldd -0x68(%r30), p064a 634 add,dc p224d1, p224d2, m224 635 add,dc %r0, %r0, m288 636 ldd -0x40(%r30), p064b 637 depd,z m032, 31, 32, ma000 638 ldd -0x28(%r30), p128b 639 extrd,u m032, 31, 32, ma064 640 depd m096, 31, 32, ma064 641 ldd -0x60(%r30), p128c 642 extrd,u m096, 31, 32, ma128 643 depd m160, 31, 32, ma128 644 ldd -0x48(%r30), p192c 645 extrd,u m160, 31, 32, ma192 646 depd m224, 31, 32, ma192 647 ldd -0x20(%r30), p192d 648 extrd,u m224, 31, 32, ma256 649 depd m288, 31, 32, ma256 650 ldd -0x88(%r30), p256d 651 add climb, p000a, s000 652 add,dc p064a, p064b, s064 653 ldd 0(rp), r000 654 add,dc p128b, p128c, s128 655 add,dc p192c, p192d, s192 656 ldd 8(rp), r064 657 add,dc p256d, %r0, climb 658 ldd 16(rp), r128 659 add ma000, s000, s000 C accum mid 0 660 ldd 24(rp), r192 661 add,dc ma064, s064, s064 C accum mid 1 662 add,dc ma128, s128, s128 C accum mid 2 663 add,dc ma192, s192, s192 C accum mid 3 664 add,dc ma256, climb, climb 665 add r000, s000, s000 C accum rlimb 0 666 add,dc r064, s064, s064 C accum rlimb 1 667 add,dc r128, s128, s128 C accum rlimb 2 668 std s000, 0(rp) 669 add,dc r192, s192, s192 C accum rlimb 3 670 add,dc %r0, climb, climb 671 std s064, 8(rp) 672 std s128, 16(rp) 673 std s192, 24(rp) 674 675 ldd -0xb0(%r30), %r13 676 ldd -0xb8(%r30), %r12 677 ldd -0xc0(%r30), %r11 678 ldd -0xc8(%r30), %r10 679 ldd -0xd0(%r30), %r9 680 ldd -0xd8(%r30), %r8 681 ldd -0xe0(%r30), %r7 682 ldd -0xe8(%r30), %r6 683 LDEF(done) 684 ifdef(`HAVE_ABI_2_0w', 685 ` copy climb, %r28 686 ',` extrd,u climb, 63, 32, %r29 687 extrd,u climb, 31, 32, %r28 688 ') 689 ldd -0xf0(%r30), %r5 690 ldd -0xf8(%r30), %r4 691 bve (%r2) 692 ldd,mb -0x100(%r30), %r3 693 EPILOGUE(mpn_addmul_1)