github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/sec_tabselect.asm (about) 1 dnl PowerPC-64 mpn_sec_tabselect. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2011-2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C POWER3/PPC630 1.75 37 C POWER4/PPC970 2.0 38 C POWER5 ? 39 C POWER6 5.0 40 C POWER7 1.75 41 42 define(`rp', `r3') 43 define(`tp', `r4') 44 define(`n', `r5') 45 define(`nents', `r6') 46 define(`which', `r7') 47 48 define(`i', `r8') 49 define(`j', `r9') 50 define(`stride', `r12') 51 define(`mask', `r11') 52 53 54 ASM_START() 55 PROLOGUE(mpn_sec_tabselect) 56 addic. j, n, -4 C outer loop induction variable 57 std r31, -8(r1) 58 std r30, -16(r1) 59 std r29, -24(r1) 60 std r28, -32(r1) 61 std r27, -40(r1) 62 sldi stride, n, 3 63 64 blt cr0, L(outer_end) 65 L(outer_top): 66 mtctr nents 67 mr r10, tp 68 li r28, 0 69 li r29, 0 70 li r30, 0 71 li r31, 0 72 addic. j, j, -4 C outer loop induction variable 73 mr i, which 74 75 ALIGN(16) 76 L(top): addic i, i, -1 C set carry iff i != 0 77 subfe mask, mask, mask 78 ld r0, 0(tp) 79 ld r27, 8(tp) 80 and r0, r0, mask 81 and r27, r27, mask 82 or r28, r28, r0 83 or r29, r29, r27 84 ld r0, 16(tp) 85 ld r27, 24(tp) 86 and r0, r0, mask 87 and r27, r27, mask 88 or r30, r30, r0 89 or r31, r31, r27 90 add tp, tp, stride 91 bdnz L(top) 92 93 std r28, 0(rp) 94 std r29, 8(rp) 95 std r30, 16(rp) 96 std r31, 24(rp) 97 addi tp, r10, 32 98 addi rp, rp, 32 99 bge cr0, L(outer_top) 100 L(outer_end): 101 102 rldicl. r0, n, 63, 63 103 beq cr0, L(b0x) 104 L(b1x): mtctr nents 105 mr r10, tp 106 li r28, 0 107 li r29, 0 108 mr i, which 109 ALIGN(16) 110 L(tp2): addic i, i, -1 111 subfe mask, mask, mask 112 ld r0, 0(tp) 113 ld r27, 8(tp) 114 and r0, r0, mask 115 and r27, r27, mask 116 or r28, r28, r0 117 or r29, r29, r27 118 add tp, tp, stride 119 bdnz L(tp2) 120 std r28, 0(rp) 121 std r29, 8(rp) 122 addi tp, r10, 16 123 addi rp, rp, 16 124 125 L(b0x): rldicl. r0, n, 0, 63 126 beq cr0, L(b00) 127 L(b01): mtctr nents 128 mr r10, tp 129 li r28, 0 130 mr i, which 131 ALIGN(16) 132 L(tp1): addic i, i, -1 133 subfe mask, mask, mask 134 ld r0, 0(tp) 135 and r0, r0, mask 136 or r28, r28, r0 137 add tp, tp, stride 138 bdnz L(tp1) 139 std r28, 0(rp) 140 141 L(b00): ld r31, -8(r1) 142 ld r30, -16(r1) 143 ld r29, -24(r1) 144 ld r28, -32(r1) 145 ld r27, -40(r1) 146 blr 147 EPILOGUE()