github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/mul_2.asm (about) 1 dnl IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store 2 dnl store the result to a (n+1)-limb number. 3 4 dnl Contributed to the GNU project by Torbjorn Granlund. 5 6 dnl Copyright 2004, 2011 Free Software Foundation, Inc. 7 8 dnl This file is part of the GNU MP Library. 9 dnl 10 dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 dnl it under the terms of either: 12 dnl 13 dnl * the GNU Lesser General Public License as published by the Free 14 dnl Software Foundation; either version 3 of the License, or (at your 15 dnl option) any later version. 16 dnl 17 dnl or 18 dnl 19 dnl * the GNU General Public License as published by the Free Software 20 dnl Foundation; either version 2 of the License, or (at your option) any 21 dnl later version. 22 dnl 23 dnl or both in parallel, as here. 24 dnl 25 dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 dnl for more details. 29 dnl 30 dnl You should have received copies of the GNU General Public License and the 31 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 dnl see https://www.gnu.org/licenses/. 33 34 include(`../config.m4') 35 36 C cycles/limb 37 C Itanium: ? 38 C Itanium 2: 1.5 39 40 C TODO 41 C * Clean up variable names, and try to decrease the number of distinct 42 C registers used. 43 C * Clean up feed-in code to not require zeroing several registers. 44 C * Make sure we don't depend on uninitialized predicate registers. 45 C * Could perhaps save a few cycles by using 1 c/l carry propagation in 46 C wind-down code. 47 C * Ultimately rewrite. The problem with this code is that it first uses a 48 C loaded u value in one xma pair, then leaves it live over several unrelated 49 C xma pairs, before it uses it again. It should actually be quite possible 50 C to just swap some aligned xma pairs around. But we should then schedule 51 C u loads further from the first use. 52 53 C INPUT PARAMETERS 54 define(`rp',`r32') 55 define(`up',`r33') 56 define(`n',`r34') 57 define(`vp',`r35') 58 59 define(`srp',`r3') 60 61 define(`v0',`f6') 62 define(`v1',`f7') 63 64 define(`s0',`r14') 65 define(`acc0',`r15') 66 67 define(`pr0_0',`r16') define(`pr0_1',`r17') 68 define(`pr0_2',`r18') define(`pr0_3',`r19') 69 70 define(`pr1_0',`r20') define(`pr1_1',`r21') 71 define(`pr1_2',`r22') define(`pr1_3',`r23') 72 73 define(`acc1_0',`r24') define(`acc1_1',`r25') 74 define(`acc1_2',`r26') define(`acc1_3',`r27') 75 76 dnl define(`',`r28') 77 dnl define(`',`r29') 78 dnl define(`',`r30') 79 dnl define(`',`r31') 80 81 define(`fp0b_0',`f8') define(`fp0b_1',`f9') 82 define(`fp0b_2',`f10') define(`fp0b_3',`f11') 83 84 define(`fp1a_0',`f12') define(`fp1a_1',`f13') 85 define(`fp1a_2',`f14') define(`fp1a_3',`f15') 86 87 define(`fp1b_0',`f32') define(`fp1b_1',`f33') 88 define(`fp1b_2',`f34') define(`fp1b_3',`f35') 89 90 define(`fp2a_0',`f36') define(`fp2a_1',`f37') 91 define(`fp2a_2',`f38') define(`fp2a_3',`f39') 92 93 define(`u_0',`f44') define(`u_1',`f45') 94 define(`u_2',`f46') define(`u_3',`f47') 95 96 define(`ux',`f49') 97 define(`uy',`f51') 98 99 ASM_START() 100 PROLOGUE(mpn_mul_2) 101 .prologue 102 .save ar.lc, r2 103 .body 104 105 ifdef(`HAVE_ABI_32',` 106 {.mmi; addp4 rp = 0, rp C M I 107 addp4 up = 0, up C M I 108 addp4 vp = 0, vp C M I 109 }{.mmi; nop 1 110 nop 1 111 zxt4 n = n C I 112 ;; 113 }') 114 115 {.mmi; ldf8 ux = [up], 8 C M 116 ldf8 v0 = [vp], 8 C M 117 mov r2 = ar.lc C I0 118 }{.mmi; nop 1 C M 119 and r14 = 3, n C M I 120 add n = -2, n C M I 121 ;; 122 }{.mmi; ldf8 uy = [up], 8 C M 123 ldf8 v1 = [vp] C M 124 shr.u n = n, 2 C I0 125 }{.mmi; nop 1 C M 126 cmp.eq p10, p0 = 1, r14 C M I 127 cmp.eq p11, p0 = 2, r14 C M I 128 ;; 129 }{.mmi; nop 1 C M 130 cmp.eq p12, p0 = 3, r14 C M I 131 mov ar.lc = n C I0 132 }{.bbb; (p10) br.dptk L(b01) C B 133 (p11) br.dptk L(b10) C B 134 (p12) br.dptk L(b11) C B 135 ;; 136 } 137 ALIGN(32) 138 L(b00): ldf8 u_1 = [up], 8 139 mov acc1_2 = 0 140 mov pr1_2 = 0 141 mov pr0_3 = 0 142 cmp.ne p8, p9 = r0, r0 143 ;; 144 xma.l fp0b_3 = ux, v0, f0 145 cmp.ne p12, p13 = r0, r0 146 ldf8 u_2 = [up], 8 147 xma.hu fp1a_3 = ux, v0, f0 148 br.cloop.dptk L(gt4) 149 150 xma.l fp0b_0 = uy, v0, f0 151 xma.hu fp1a_0 = uy, v0, f0 152 ;; 153 getfsig acc0 = fp0b_3 154 xma.l fp1b_3 = ux, v1, fp1a_3 155 xma.hu fp2a_3 = ux, v1, fp1a_3 156 ;; 157 xma.l fp0b_1 = u_1, v0, f0 158 xma.hu fp1a_1 = u_1, v0, f0 159 ;; 160 getfsig pr0_0 = fp0b_0 161 xma.l fp1b_0 = uy, v1, fp1a_0 162 xma.hu fp2a_0 = uy, v1, fp1a_0 163 ;; 164 getfsig pr1_3 = fp1b_3 165 getfsig acc1_3 = fp2a_3 166 xma.l fp0b_2 = u_2, v0, f0 167 xma.hu fp1a_2 = u_2, v0, f0 168 br L(cj4) 169 170 L(gt4): xma.l fp0b_0 = uy, v0, f0 171 xma.hu fp1a_0 = uy, v0, f0 172 ;; 173 getfsig acc0 = fp0b_3 174 xma.l fp1b_3 = ux, v1, fp1a_3 175 ldf8 u_3 = [up], 8 176 xma.hu fp2a_3 = ux, v1, fp1a_3 177 ;; 178 xma.l fp0b_1 = u_1, v0, f0 179 xma.hu fp1a_1 = u_1, v0, f0 180 ;; 181 getfsig pr0_0 = fp0b_0 182 xma.l fp1b_0 = uy, v1, fp1a_0 183 xma.hu fp2a_0 = uy, v1, fp1a_0 184 ;; 185 ldf8 u_0 = [up], 8 186 getfsig pr1_3 = fp1b_3 187 xma.l fp0b_2 = u_2, v0, f0 188 ;; 189 getfsig acc1_3 = fp2a_3 190 xma.hu fp1a_2 = u_2, v0, f0 191 br L(00) 192 193 194 ALIGN(32) 195 L(b01): ldf8 u_0 = [up], 8 C M 196 mov acc1_1 = 0 C M I 197 mov pr1_1 = 0 C M I 198 mov pr0_2 = 0 C M I 199 cmp.ne p6, p7 = r0, r0 C M I 200 ;; 201 xma.l fp0b_2 = ux, v0, f0 C F 202 cmp.ne p10, p11 = r0, r0 C M I 203 ldf8 u_1 = [up], 8 C M 204 xma.hu fp1a_2 = ux, v0, f0 C F 205 ;; 206 xma.l fp0b_3 = uy, v0, f0 C F 207 xma.hu fp1a_3 = uy, v0, f0 C F 208 ;; 209 getfsig acc0 = fp0b_2 C M 210 xma.l fp1b_2 = ux, v1,fp1a_2 C F 211 ldf8 u_2 = [up], 8 C M 212 xma.hu fp2a_2 = ux, v1,fp1a_2 C F 213 br.cloop.dptk L(gt5) 214 215 xma.l fp0b_0 = u_0, v0, f0 C F 216 xma.hu fp1a_0 = u_0, v0, f0 C F 217 ;; 218 getfsig pr0_3 = fp0b_3 C M 219 xma.l fp1b_3 = uy, v1,fp1a_3 C F 220 xma.hu fp2a_3 = uy, v1,fp1a_3 C F 221 ;; 222 getfsig pr1_2 = fp1b_2 C M 223 getfsig acc1_2 = fp2a_2 C M 224 xma.l fp0b_1 = u_1, v0, f0 C F 225 xma.hu fp1a_1 = u_1, v0, f0 C F 226 br L(cj5) 227 228 L(gt5): xma.l fp0b_0 = u_0, v0, f0 229 xma.hu fp1a_0 = u_0, v0, f0 230 ;; 231 getfsig pr0_3 = fp0b_3 232 xma.l fp1b_3 = uy, v1, fp1a_3 233 xma.hu fp2a_3 = uy, v1, fp1a_3 234 ;; 235 ldf8 u_3 = [up], 8 236 getfsig pr1_2 = fp1b_2 237 xma.l fp0b_1 = u_1, v0, f0 238 ;; 239 getfsig acc1_2 = fp2a_2 240 xma.hu fp1a_1 = u_1, v0, f0 241 br L(01) 242 243 244 ALIGN(32) 245 L(b10): br.cloop.dptk L(gt2) 246 xma.l fp0b_1 = ux, v0, f0 247 xma.hu fp1a_1 = ux, v0, f0 248 ;; 249 xma.l fp0b_2 = uy, v0, f0 250 xma.hu fp1a_2 = uy, v0, f0 251 ;; 252 stf8 [rp] = fp0b_1, 8 253 xma.l fp1b_1 = ux, v1, fp1a_1 254 xma.hu fp2a_1 = ux, v1, fp1a_1 255 ;; 256 getfsig acc0 = fp0b_2 257 xma.l fp1b_2 = uy, v1, fp1a_2 258 xma.hu fp2a_2 = uy, v1, fp1a_2 259 ;; 260 getfsig pr1_1 = fp1b_1 261 getfsig acc1_1 = fp2a_1 262 mov ar.lc = r2 263 getfsig pr1_2 = fp1b_2 264 getfsig r8 = fp2a_2 265 ;; 266 add s0 = pr1_1, acc0 267 ;; 268 st8 [rp] = s0, 8 269 cmp.ltu p8, p9 = s0, pr1_1 270 sub r31 = -1, acc1_1 271 ;; 272 .pred.rel "mutex", p8, p9 273 (p8) add acc0 = pr1_2, acc1_1, 1 274 (p9) add acc0 = pr1_2, acc1_1 275 (p8) cmp.leu p10, p0 = r31, pr1_2 276 (p9) cmp.ltu p10, p0 = r31, pr1_2 277 ;; 278 st8 [rp] = acc0, 8 279 (p10) add r8 = 1, r8 280 br.ret.sptk.many b0 281 282 L(gt2): ldf8 u_3 = [up], 8 283 mov acc1_0 = 0 284 mov pr1_0 = 0 285 ;; 286 mov pr0_1 = 0 287 xma.l fp0b_1 = ux, v0, f0 288 ldf8 u_0 = [up], 8 289 xma.hu fp1a_1 = ux, v0, f0 290 ;; 291 xma.l fp0b_2 = uy, v0, f0 292 xma.hu fp1a_2 = uy, v0, f0 293 ;; 294 getfsig acc0 = fp0b_1 295 xma.l fp1b_1 = ux, v1, fp1a_1 296 xma.hu fp2a_1 = ux, v1, fp1a_1 297 ;; 298 ldf8 u_1 = [up], 8 299 xma.l fp0b_3 = u_3, v0, f0 300 xma.hu fp1a_3 = u_3, v0, f0 301 ;; 302 getfsig pr0_2 = fp0b_2 303 xma.l fp1b_2 = uy, v1, fp1a_2 304 xma.hu fp2a_2 = uy, v1, fp1a_2 305 ;; 306 ldf8 u_2 = [up], 8 307 getfsig pr1_1 = fp1b_1 308 ;; 309 {.mfi; getfsig acc1_1 = fp2a_1 310 xma.l fp0b_0 = u_0, v0, f0 311 cmp.ne p8, p9 = r0, r0 312 }{.mfb; cmp.ne p12, p13 = r0, r0 313 xma.hu fp1a_0 = u_0, v0, f0 314 br L(10) 315 } 316 317 ALIGN(32) 318 L(b11): mov acc1_3 = 0 319 mov pr1_3 = 0 320 mov pr0_0 = 0 321 ldf8 u_2 = [up], 8 322 cmp.ne p6, p7 = r0, r0 323 br.cloop.dptk L(gt3) 324 ;; 325 xma.l fp0b_0 = ux, v0, f0 326 xma.hu fp1a_0 = ux, v0, f0 327 ;; 328 cmp.ne p10, p11 = r0, r0 329 xma.l fp0b_1 = uy, v0, f0 330 xma.hu fp1a_1 = uy, v0, f0 331 ;; 332 getfsig acc0 = fp0b_0 333 xma.l fp1b_0 = ux, v1, fp1a_0 334 xma.hu fp2a_0 = ux, v1, fp1a_0 335 ;; 336 xma.l fp0b_2 = u_2, v0, f0 337 xma.hu fp1a_2 = u_2, v0, f0 338 ;; 339 getfsig pr0_1 = fp0b_1 340 xma.l fp1b_1 = uy, v1, fp1a_1 341 xma.hu fp2a_1 = uy, v1, fp1a_1 342 ;; 343 getfsig pr1_0 = fp1b_0 344 getfsig acc1_0 = fp2a_0 345 br L(cj3) 346 347 L(gt3): xma.l fp0b_0 = ux, v0, f0 348 cmp.ne p10, p11 = r0, r0 349 ldf8 u_3 = [up], 8 350 xma.hu fp1a_0 = ux, v0, f0 351 ;; 352 xma.l fp0b_1 = uy, v0, f0 353 xma.hu fp1a_1 = uy, v0, f0 354 ;; 355 getfsig acc0 = fp0b_0 356 xma.l fp1b_0 = ux, v1, fp1a_0 357 ldf8 u_0 = [up], 8 358 xma.hu fp2a_0 = ux, v1, fp1a_0 359 ;; 360 xma.l fp0b_2 = u_2, v0, f0 361 xma.hu fp1a_2 = u_2, v0, f0 362 ;; 363 getfsig pr0_1 = fp0b_1 364 xma.l fp1b_1 = uy, v1, fp1a_1 365 xma.hu fp2a_1 = uy, v1, fp1a_1 366 ;; 367 ldf8 u_1 = [up], 8 368 getfsig pr1_0 = fp1b_0 369 ;; 370 getfsig acc1_0 = fp2a_0 371 xma.l fp0b_3 = u_3, v0, f0 372 xma.hu fp1a_3 = u_3, v0, f0 373 br L(11) 374 375 376 C *** MAIN LOOP START *** 377 ALIGN(32) 378 L(top): C 00 379 .pred.rel "mutex", p8, p9 380 .pred.rel "mutex", p12, p13 381 ldf8 u_3 = [up], 8 382 getfsig pr1_2 = fp1b_2 383 (p8) cmp.leu p6, p7 = acc0, pr0_1 384 (p9) cmp.ltu p6, p7 = acc0, pr0_1 385 (p12) cmp.leu p10, p11 = s0, pr1_0 386 (p13) cmp.ltu p10, p11 = s0, pr1_0 387 ;; C 01 388 .pred.rel "mutex", p6, p7 389 getfsig acc1_2 = fp2a_2 390 st8 [rp] = s0, 8 391 xma.l fp0b_1 = u_1, v0, f0 392 (p6) add acc0 = pr0_2, acc1_0, 1 393 (p7) add acc0 = pr0_2, acc1_0 394 xma.hu fp1a_1 = u_1, v0, f0 395 ;; C 02 396 L(01): 397 .pred.rel "mutex", p10, p11 398 getfsig pr0_0 = fp0b_0 399 xma.l fp1b_0 = u_0, v1, fp1a_0 400 (p10) add s0 = pr1_1, acc0, 1 401 (p11) add s0 = pr1_1, acc0 402 xma.hu fp2a_0 = u_0, v1, fp1a_0 403 nop 1 404 ;; C 03 405 .pred.rel "mutex", p6, p7 406 .pred.rel "mutex", p10, p11 407 ldf8 u_0 = [up], 8 408 getfsig pr1_3 = fp1b_3 409 (p6) cmp.leu p8, p9 = acc0, pr0_2 410 (p7) cmp.ltu p8, p9 = acc0, pr0_2 411 (p10) cmp.leu p12, p13 = s0, pr1_1 412 (p11) cmp.ltu p12, p13 = s0, pr1_1 413 ;; C 04 414 .pred.rel "mutex", p8, p9 415 getfsig acc1_3 = fp2a_3 416 st8 [rp] = s0, 8 417 xma.l fp0b_2 = u_2, v0, f0 418 (p8) add acc0 = pr0_3, acc1_1, 1 419 (p9) add acc0 = pr0_3, acc1_1 420 xma.hu fp1a_2 = u_2, v0, f0 421 ;; C 05 422 L(00): 423 .pred.rel "mutex", p12, p13 424 getfsig pr0_1 = fp0b_1 425 xma.l fp1b_1 = u_1, v1, fp1a_1 426 (p12) add s0 = pr1_2, acc0, 1 427 (p13) add s0 = pr1_2, acc0 428 xma.hu fp2a_1 = u_1, v1, fp1a_1 429 nop 1 430 ;; C 06 431 .pred.rel "mutex", p8, p9 432 .pred.rel "mutex", p12, p13 433 ldf8 u_1 = [up], 8 434 getfsig pr1_0 = fp1b_0 435 (p8) cmp.leu p6, p7 = acc0, pr0_3 436 (p9) cmp.ltu p6, p7 = acc0, pr0_3 437 (p12) cmp.leu p10, p11 = s0, pr1_2 438 (p13) cmp.ltu p10, p11 = s0, pr1_2 439 ;; C 07 440 .pred.rel "mutex", p6, p7 441 getfsig acc1_0 = fp2a_0 442 st8 [rp] = s0, 8 443 xma.l fp0b_3 = u_3, v0, f0 444 (p6) add acc0 = pr0_0, acc1_2, 1 445 (p7) add acc0 = pr0_0, acc1_2 446 xma.hu fp1a_3 = u_3, v0, f0 447 ;; C 08 448 L(11): 449 .pred.rel "mutex", p10, p11 450 getfsig pr0_2 = fp0b_2 451 xma.l fp1b_2 = u_2, v1, fp1a_2 452 (p10) add s0 = pr1_3, acc0, 1 453 (p11) add s0 = pr1_3, acc0 454 xma.hu fp2a_2 = u_2, v1, fp1a_2 455 nop 1 456 ;; C 09 457 .pred.rel "mutex", p6, p7 458 .pred.rel "mutex", p10, p11 459 ldf8 u_2 = [up], 8 460 getfsig pr1_1 = fp1b_1 461 (p6) cmp.leu p8, p9 = acc0, pr0_0 462 (p7) cmp.ltu p8, p9 = acc0, pr0_0 463 (p10) cmp.leu p12, p13 = s0, pr1_3 464 (p11) cmp.ltu p12, p13 = s0, pr1_3 465 ;; C 10 466 .pred.rel "mutex", p8, p9 467 getfsig acc1_1 = fp2a_1 468 st8 [rp] = s0, 8 469 xma.l fp0b_0 = u_0, v0, f0 470 (p8) add acc0 = pr0_1, acc1_3, 1 471 (p9) add acc0 = pr0_1, acc1_3 472 xma.hu fp1a_0 = u_0, v0, f0 473 ;; C 11 474 L(10): 475 .pred.rel "mutex", p12, p13 476 getfsig pr0_3 = fp0b_3 477 xma.l fp1b_3 = u_3, v1, fp1a_3 478 (p12) add s0 = pr1_0, acc0, 1 479 (p13) add s0 = pr1_0, acc0 480 xma.hu fp2a_3 = u_3, v1, fp1a_3 481 br.cloop.dptk L(top) 482 ;; 483 C *** MAIN LOOP END *** 484 485 .pred.rel "mutex", p8, p9 486 .pred.rel "mutex", p12, p13 487 {.mmi; getfsig pr1_2 = fp1b_2 488 st8 [rp] = s0, 8 489 (p8) cmp.leu p6, p7 = acc0, pr0_1 490 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1 491 (p12) cmp.leu p10, p11 = s0, pr1_0 492 (p13) cmp.ltu p10, p11 = s0, pr1_0 493 ;; 494 } .pred.rel "mutex", p6, p7 495 {.mfi; getfsig acc1_2 = fp2a_2 496 xma.l fp0b_1 = u_1, v0, f0 497 nop 1 498 }{.mmf; (p6) add acc0 = pr0_2, acc1_0, 1 499 (p7) add acc0 = pr0_2, acc1_0 500 xma.hu fp1a_1 = u_1, v0, f0 501 ;; 502 } 503 L(cj5): 504 .pred.rel "mutex", p10, p11 505 {.mfi; getfsig pr0_0 = fp0b_0 506 xma.l fp1b_0 = u_0, v1, fp1a_0 507 (p10) add s0 = pr1_1, acc0, 1 508 }{.mfi; (p11) add s0 = pr1_1, acc0 509 xma.hu fp2a_0 = u_0, v1, fp1a_0 510 nop 1 511 ;; 512 } .pred.rel "mutex", p6, p7 513 .pred.rel "mutex", p10, p11 514 {.mmi; getfsig pr1_3 = fp1b_3 515 st8 [rp] = s0, 8 516 (p6) cmp.leu p8, p9 = acc0, pr0_2 517 }{.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2 518 (p10) cmp.leu p12, p13 = s0, pr1_1 519 (p11) cmp.ltu p12, p13 = s0, pr1_1 520 ;; 521 } .pred.rel "mutex", p8, p9 522 {.mfi; getfsig acc1_3 = fp2a_3 523 xma.l fp0b_2 = u_2, v0, f0 524 nop 1 525 }{.mmf; (p8) add acc0 = pr0_3, acc1_1, 1 526 (p9) add acc0 = pr0_3, acc1_1 527 xma.hu fp1a_2 = u_2, v0, f0 528 ;; 529 } 530 L(cj4): 531 .pred.rel "mutex", p12, p13 532 {.mfi; getfsig pr0_1 = fp0b_1 533 xma.l fp1b_1 = u_1, v1, fp1a_1 534 (p12) add s0 = pr1_2, acc0, 1 535 }{.mfi; (p13) add s0 = pr1_2, acc0 536 xma.hu fp2a_1 = u_1, v1, fp1a_1 537 nop 1 538 ;; 539 } .pred.rel "mutex", p8, p9 540 .pred.rel "mutex", p12, p13 541 {.mmi; getfsig pr1_0 = fp1b_0 542 st8 [rp] = s0, 8 543 (p8) cmp.leu p6, p7 = acc0, pr0_3 544 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_3 545 (p12) cmp.leu p10, p11 = s0, pr1_2 546 (p13) cmp.ltu p10, p11 = s0, pr1_2 547 ;; 548 } .pred.rel "mutex", p6, p7 549 {.mmi; getfsig acc1_0 = fp2a_0 550 (p6) add acc0 = pr0_0, acc1_2, 1 551 (p7) add acc0 = pr0_0, acc1_2 552 ;; 553 } 554 L(cj3): 555 .pred.rel "mutex", p10, p11 556 {.mfi; getfsig pr0_2 = fp0b_2 557 xma.l fp1b_2 = u_2, v1, fp1a_2 558 (p10) add s0 = pr1_3, acc0, 1 559 }{.mfi; (p11) add s0 = pr1_3, acc0 560 xma.hu fp2a_2 = u_2, v1, fp1a_2 561 nop 1 562 ;; 563 } .pred.rel "mutex", p6, p7 564 .pred.rel "mutex", p10, p11 565 {.mmi; getfsig pr1_1 = fp1b_1 566 st8 [rp] = s0, 8 567 (p6) cmp.leu p8, p9 = acc0, pr0_0 568 }{.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_0 569 (p10) cmp.leu p12, p13 = s0, pr1_3 570 (p11) cmp.ltu p12, p13 = s0, pr1_3 571 ;; 572 } .pred.rel "mutex", p8, p9 573 {.mmi; getfsig acc1_1 = fp2a_1 574 (p8) add acc0 = pr0_1, acc1_3, 1 575 (p9) add acc0 = pr0_1, acc1_3 576 ;; 577 } .pred.rel "mutex", p12, p13 578 {.mmi; (p12) add s0 = pr1_0, acc0, 1 579 (p13) add s0 = pr1_0, acc0 580 nop 1 581 ;; 582 } .pred.rel "mutex", p8, p9 583 .pred.rel "mutex", p12, p13 584 {.mmi; getfsig pr1_2 = fp1b_2 585 st8 [rp] = s0, 8 586 (p8) cmp.leu p6, p7 = acc0, pr0_1 587 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1 588 (p12) cmp.leu p10, p11 = s0, pr1_0 589 (p13) cmp.ltu p10, p11 = s0, pr1_0 590 ;; 591 } .pred.rel "mutex", p6, p7 592 {.mmi; getfsig r8 = fp2a_2 593 (p6) add acc0 = pr0_2, acc1_0, 1 594 (p7) add acc0 = pr0_2, acc1_0 595 ;; 596 } .pred.rel "mutex", p10, p11 597 {.mmi; (p10) add s0 = pr1_1, acc0, 1 598 (p11) add s0 = pr1_1, acc0 599 (p6) cmp.leu p8, p9 = acc0, pr0_2 600 ;; 601 } .pred.rel "mutex", p10, p11 602 {.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2 603 (p10) cmp.leu p12, p13 = s0, pr1_1 604 (p11) cmp.ltu p12, p13 = s0, pr1_1 605 ;; 606 } .pred.rel "mutex", p8, p9 607 {.mmi; st8 [rp] = s0, 8 608 (p8) add acc0 = pr1_2, acc1_1, 1 609 (p9) add acc0 = pr1_2, acc1_1 610 ;; 611 } .pred.rel "mutex", p8, p9 612 {.mmi; (p8) cmp.leu p10, p11 = acc0, pr1_2 613 (p9) cmp.ltu p10, p11 = acc0, pr1_2 614 (p12) add acc0 = 1, acc0 615 ;; 616 }{.mmi; st8 [rp] = acc0, 8 617 (p12) cmpeqor p10, p0 = 0, acc0 618 nop 1 619 ;; 620 }{.mib; (p10) add r8 = 1, r8 621 mov ar.lc = r2 622 br.ret.sptk.many b0 623 } 624 EPILOGUE() 625 ASM_END()