github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/submul_1.asm (about) 1 dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract 2 dnl the result from a second limb vector. 3 4 dnl Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation, 5 dnl Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C 603e: ? 37 C 604e: 7.5 38 C 75x (G3): 9.3-15 39 C 7400,7410 (G4): 9.3-15 40 C 744x,745x (G4+): 10.5 41 C power4/ppc970: 6.75 42 C power5: 6.5 43 44 C INPUT PARAMETERS 45 C rp r3 46 C up r4 47 C n r5 48 C vl r6 49 50 C This is optimized for the PPC604. See addmul_1.asm for additional comments. 51 52 ASM_START() 53 PROLOGUE(mpn_submul_1) 54 cmpwi cr0,r5,9 C more than 9 limbs? 55 bgt cr0,L(big) C branch if more than 9 limbs 56 57 mtctr r5 58 lwz r0,0(r4) 59 mullw r7,r0,r6 60 mulhwu r10,r0,r6 61 lwz r9,0(r3) 62 subfc r8,r7,r9 63 addc r7,r7,r8 C invert cy (r7 is junk) 64 addi r3,r3,-4 65 bdz L(end) 66 L(loop): 67 lwzu r0,4(r4) 68 stwu r8,4(r3) 69 mullw r8,r0,r6 70 adde r7,r8,r10 71 mulhwu r10,r0,r6 72 lwz r9,4(r3) 73 addze r10,r10 74 subfc r8,r7,r9 75 addc r7,r7,r8 C invert cy (r7 is junk) 76 bdnz L(loop) 77 L(end): stw r8,4(r3) 78 addze r3,r10 79 blr 80 81 L(big): stwu r1,-16(r1) 82 addi r5,r5,-1 83 stw r30,8(r1) 84 srwi r0,r5,2 85 stw r31,12(r1) 86 mtctr r0 87 88 lwz r7,0(r4) 89 mullw r8,r7,r6 90 mulhwu r0,r7,r6 91 lwz r7,0(r3) 92 subfc r7,r8,r7 93 addc r8,r8,r7 94 stw r7,0(r3) 95 96 L(loopU): 97 lwz r7,4(r4) 98 lwz r12,8(r4) 99 lwz r30,12(r4) 100 lwzu r31,16(r4) 101 mullw r8,r7,r6 102 mullw r9,r12,r6 103 mullw r10,r30,r6 104 mullw r11,r31,r6 105 adde r8,r8,r0 C add cy_limb 106 mulhwu r0,r7,r6 107 lwz r7,4(r3) 108 adde r9,r9,r0 109 mulhwu r0,r12,r6 110 lwz r12,8(r3) 111 adde r10,r10,r0 112 mulhwu r0,r30,r6 113 lwz r30,12(r3) 114 adde r11,r11,r0 115 mulhwu r0,r31,r6 116 lwz r31,16(r3) 117 addze r0,r0 C new cy_limb 118 subfc r7,r8,r7 119 stw r7,4(r3) 120 subfe r12,r9,r12 121 stw r12,8(r3) 122 subfe r30,r10,r30 123 stw r30,12(r3) 124 subfe r31,r11,r31 125 stwu r31,16(r3) 126 subfe r11,r11,r11 C invert ... 127 addic r11,r11,1 C ... carry 128 bdnz L(loopU) 129 130 andi. r31,r5,3 131 mtctr r31 132 beq cr0,L(endx) 133 134 L(loopE): 135 lwzu r7,4(r4) 136 mullw r8,r7,r6 137 adde r8,r8,r0 C add cy_limb 138 mulhwu r0,r7,r6 139 lwz r7,4(r3) 140 addze r0,r0 C new cy_limb 141 subfc r7,r8,r7 142 addc r8,r8,r7 143 stwu r7,4(r3) 144 bdnz L(loopE) 145 L(endx): 146 addze r3,r0 147 lwz r30,8(r1) 148 lwz r31,12(r1) 149 addi r1,r1,16 150 blr 151 EPILOGUE(mpn_submul_1)