github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/aorsmul_1.asm (about) 1 dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1. 2 3 dnl Copyright 1999-2001, 2003-2006, 2010-2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C mpn_addmul_1 mpn_submul_1 34 C cycles/limb cycles/limb 35 C POWER3/PPC630 6-18 6-18 36 C POWER4/PPC970 8 8.3 37 C POWER5 8 8.25 38 C POWER6 16.25 16.75 39 C POWER7 3.77 4.9 40 41 C TODO 42 C * Try to reduce the number of needed live registers 43 C * Add support for _1c entry points 44 45 C INPUT PARAMETERS 46 define(`rp', `r3') 47 define(`up', `r4') 48 define(`n', `r5') 49 define(`vl', `r6') 50 51 ifdef(`OPERATION_addmul_1',` 52 define(ADDSUBC, adde) 53 define(ADDSUB, addc) 54 define(func, mpn_addmul_1) 55 define(func_nc, mpn_addmul_1c) C FIXME: not really supported 56 define(SM, `') 57 ') 58 ifdef(`OPERATION_submul_1',` 59 define(ADDSUBC, subfe) 60 define(ADDSUB, subfc) 61 define(func, mpn_submul_1) 62 define(func_nc, mpn_submul_1c) C FIXME: not really supported 63 define(SM, `$1') 64 ') 65 66 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 67 68 ASM_START() 69 PROLOGUE(func) 70 std r31, -8(r1) 71 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0 72 std r30, -16(r1) 73 cmpdi cr6, r0, 2 74 std r29, -24(r1) 75 addi n, n, 3 C compute count... 76 std r28, -32(r1) 77 srdi n, n, 2 C ...for ctr 78 std r27, -40(r1) 79 mtctr n C copy count into ctr 80 beq cr0, L(b00) 81 blt cr6, L(b01) 82 beq cr6, L(b10) 83 84 L(b11): ld r9, 0(up) 85 ld r28, 0(rp) 86 mulld r0, r9, r6 87 mulhdu r12, r9, r6 88 ADDSUB r0, r0, r28 89 std r0, 0(rp) 90 addi rp, rp, 8 91 ld r9, 8(up) 92 ld r27, 16(up) 93 addi up, up, 24 94 SM(` subfe r11, r11, r11 ') 95 b L(bot) 96 97 ALIGN(16) 98 L(b00): ld r9, 0(up) 99 ld r27, 8(up) 100 ld r28, 0(rp) 101 ld r29, 8(rp) 102 mulld r0, r9, r6 103 mulhdu r5, r9, r6 104 mulld r7, r27, r6 105 mulhdu r8, r27, r6 106 addc r7, r7, r5 107 addze r12, r8 108 ADDSUB r0, r0, r28 109 std r0, 0(rp) 110 ADDSUBC r7, r7, r29 111 std r7, 8(rp) 112 addi rp, rp, 16 113 ld r9, 16(up) 114 ld r27, 24(up) 115 addi up, up, 32 116 SM(` subfe r11, r11, r11 ') 117 b L(bot) 118 119 ALIGN(16) 120 L(b01): bdnz L(gt1) 121 ld r9, 0(up) 122 ld r11, 0(rp) 123 mulld r0, r9, r6 124 mulhdu r8, r9, r6 125 ADDSUB r0, r0, r11 126 std r0, 0(rp) 127 SM(` subfe r11, r11, r11 ') 128 SM(` addic r11, r11, 1 ') 129 addze r3, r8 130 blr 131 L(gt1): ld r9, 0(up) 132 ld r27, 8(up) 133 mulld r0, r9, r6 134 mulhdu r5, r9, r6 135 mulld r7, r27, r6 136 mulhdu r8, r27, r6 137 ld r9, 16(up) 138 ld r28, 0(rp) 139 ld r29, 8(rp) 140 ld r30, 16(rp) 141 mulld r11, r9, r6 142 mulhdu r10, r9, r6 143 addc r7, r7, r5 144 adde r11, r11, r8 145 addze r12, r10 146 ADDSUB r0, r0, r28 147 std r0, 0(rp) 148 ADDSUBC r7, r7, r29 149 std r7, 8(rp) 150 ADDSUBC r11, r11, r30 151 std r11, 16(rp) 152 addi rp, rp, 24 153 ld r9, 24(up) 154 ld r27, 32(up) 155 addi up, up, 40 156 SM(` subfe r11, r11, r11 ') 157 b L(bot) 158 159 L(b10): addic r0, r0, 0 160 li r12, 0 C cy_limb = 0 161 ld r9, 0(up) 162 ld r27, 8(up) 163 bdz L(end) 164 addi up, up, 16 165 166 ALIGN(16) 167 L(top): mulld r0, r9, r6 168 mulhdu r5, r9, r6 C 9 169 mulld r7, r27, r6 170 mulhdu r8, r27, r6 C 27 171 ld r9, 0(up) 172 ld r28, 0(rp) 173 ld r27, 8(up) 174 ld r29, 8(rp) 175 adde r0, r0, r12 C 0 12 176 adde r7, r7, r5 C 5 7 177 mulld r5, r9, r6 178 mulhdu r10, r9, r6 C 9 179 mulld r11, r27, r6 180 mulhdu r12, r27, r6 C 27 181 ld r9, 16(up) 182 ld r30, 16(rp) 183 ld r27, 24(up) 184 ld r31, 24(rp) 185 adde r5, r5, r8 C 8 5 186 adde r11, r11, r10 C 10 11 187 addze r12, r12 C 12 188 ADDSUB r0, r0, r28 C 0 28 189 std r0, 0(rp) C 0 190 ADDSUBC r7, r7, r29 C 7 29 191 std r7, 8(rp) C 7 192 ADDSUBC r5, r5, r30 C 5 30 193 std r5, 16(rp) C 5 194 ADDSUBC r11, r11, r31 C 11 31 195 std r11, 24(rp) C 11 196 addi up, up, 32 197 SM(` subfe r11, r11, r11 ') 198 addi rp, rp, 32 199 L(bot): 200 SM(` addic r11, r11, 1 ') 201 bdnz L(top) 202 203 L(end): mulld r0, r9, r6 204 mulhdu r5, r9, r6 205 mulld r7, r27, r6 206 mulhdu r8, r27, r6 207 ld r28, 0(rp) 208 ld r29, 8(rp) 209 adde r0, r0, r12 210 adde r7, r7, r5 211 addze r8, r8 212 ADDSUB r0, r0, r28 213 std r0, 0(rp) 214 ADDSUBC r7, r7, r29 215 std r7, 8(rp) 216 SM(` subfe r11, r11, r11 ') 217 SM(` addic r11, r11, 1 ') 218 addze r3, r8 219 ld r31, -8(r1) 220 ld r30, -16(r1) 221 ld r29, -24(r1) 222 ld r28, -32(r1) 223 ld r27, -40(r1) 224 blr 225 EPILOGUE()