github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/mul_1.asm (about) 1 dnl PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store 2 dnl the result in a second limb vector. 3 4 dnl Copyright 1999-2001, 2003-2006, 2010 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C POWER3/PPC630 6-18 36 C POWER4/PPC970 7.25? not updated for last file revision 37 C POWER5 7.25 38 C POWER6 14 39 C POWER7 2.9 40 41 C TODO 42 C * Try to reduce the number of needed live registers (at least r5 and r10 43 C could be combined) 44 C * Optimize feed-in code, for speed and size. 45 C * Clean up r12/r7 usage in feed-in code. 46 47 C INPUT PARAMETERS 48 define(`rp', `r3') 49 define(`up', `r4') 50 define(`n', `r5') 51 define(`vl', `r6') 52 53 ASM_START() 54 PROLOGUE(mpn_mul_1c) 55 std r27, -40(r1) 56 std r26, -48(r1) 57 mr r12, r7 58 b L(ent) 59 EPILOGUE() 60 PROLOGUE(mpn_mul_1) 61 std r27, -40(r1) 62 std r26, -48(r1) 63 li r12, 0 C cy_limb = 0 64 L(ent): ld r26, 0(up) 65 66 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0 67 cmpdi cr6, r0, 2 68 addic n, n, 3 C compute count... 69 srdi n, n, 2 C ...for ctr 70 mtctr n C copy count into ctr 71 beq cr0, L(b00) 72 blt cr6, L(b01) 73 beq cr6, L(b10) 74 75 L(b11): mr r7, r12 76 mulld r0, r26, r6 77 mulhdu r12, r26, r6 78 addi up, up, 8 79 addc r0, r0, r7 80 std r0, 0(rp) 81 addi rp, rp, 8 82 b L(fic) 83 84 L(b00): ld r27, 8(up) 85 addi up, up, 16 86 mulld r0, r26, r6 87 mulhdu r5, r26, r6 88 mulld r7, r27, r6 89 mulhdu r8, r27, r6 90 addc r0, r0, r12 91 adde r7, r7, r5 92 addze r12, r8 93 std r0, 0(rp) 94 std r7, 8(rp) 95 addi rp, rp, 16 96 b L(fic) 97 98 nop C alignment 99 L(b01): bdnz L(gt1) 100 mulld r0, r26, r6 101 mulhdu r8, r26, r6 102 addc r0, r0, r12 103 std r0, 0(rp) 104 b L(ret) 105 L(gt1): ld r27, 8(up) 106 nop 107 mulld r0, r26, r6 108 mulhdu r5, r26, r6 109 ld r26, 16(up) 110 mulld r7, r27, r6 111 mulhdu r8, r27, r6 112 mulld r9, r26, r6 113 mulhdu r10, r26, r6 114 addc r0, r0, r12 115 adde r7, r7, r5 116 adde r9, r9, r8 117 addze r12, r10 118 std r0, 0(rp) 119 std r7, 8(rp) 120 std r9, 16(rp) 121 addi up, up, 24 122 addi rp, rp, 24 123 b L(fic) 124 125 nop 126 L(fic): ld r26, 0(up) 127 L(b10): ld r27, 8(up) 128 addi up, up, 16 129 bdz L(end) 130 131 L(top): mulld r0, r26, r6 132 mulhdu r5, r26, r6 133 mulld r7, r27, r6 134 mulhdu r8, r27, r6 135 ld r26, 0(up) 136 ld r27, 8(up) 137 adde r0, r0, r12 138 adde r7, r7, r5 139 mulld r9, r26, r6 140 mulhdu r10, r26, r6 141 mulld r11, r27, r6 142 mulhdu r12, r27, r6 143 ld r26, 16(up) 144 ld r27, 24(up) 145 std r0, 0(rp) 146 adde r9, r9, r8 147 std r7, 8(rp) 148 adde r11, r11, r10 149 std r9, 16(rp) 150 addi up, up, 32 151 std r11, 24(rp) 152 153 addi rp, rp, 32 154 bdnz L(top) 155 156 L(end): mulld r0, r26, r6 157 mulhdu r5, r26, r6 158 mulld r7, r27, r6 159 mulhdu r8, r27, r6 160 adde r0, r0, r12 161 adde r7, r7, r5 162 std r0, 0(rp) 163 std r7, 8(rp) 164 L(ret): addze r3, r8 165 ld r27, -40(r1) 166 ld r26, -48(r1) 167 blr 168 EPILOGUE()