github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/aors_n.asm (about) 1 dnl PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3 dnl Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 1.5 35 C POWER4/PPC970 2 36 C POWER5 2 37 C POWER6 2.63 38 C POWER7 2.25-2.87 39 40 C This code is a little bit slower for POWER3/PPC630 than the simple code used 41 C previously, but it is much faster for POWER4/PPC970. The reason for the 42 C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4 43 C registers. 44 45 C INPUT PARAMETERS 46 C rp r3 47 C up r4 48 C vp r5 49 C n r6 50 51 ifdef(`OPERATION_add_n',` 52 define(ADDSUBC, adde) 53 define(ADDSUB, addc) 54 define(func, mpn_add_n) 55 define(func_nc, mpn_add_nc) 56 define(GENRVAL, `addi r3, r3, 1') 57 define(SETCBR, `addic r0, $1, -1') 58 define(CLRCB, `addic r0, r0, 0') 59 ') 60 ifdef(`OPERATION_sub_n',` 61 define(ADDSUBC, subfe) 62 define(ADDSUB, subfc) 63 define(func, mpn_sub_n) 64 define(func_nc, mpn_sub_nc) 65 define(GENRVAL, `neg r3, r3') 66 define(SETCBR, `subfic r0, $1, 0') 67 define(CLRCB, `addic r0, r1, -1') 68 ') 69 70 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 71 72 ASM_START() 73 PROLOGUE(func_nc) 74 SETCBR(r7) 75 b L(ent) 76 EPILOGUE() 77 78 PROLOGUE(func) 79 CLRCB 80 L(ent): std r31, -8(r1) 81 std r30, -16(r1) 82 std r29, -24(r1) 83 std r28, -32(r1) 84 85 rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0 86 cmpdi cr6, r0, 2 87 addi r6, r6, 3 C compute count... 88 srdi r6, r6, 2 C ...for ctr 89 mtctr r6 C copy count into ctr 90 beq cr0, L(b00) 91 blt cr6, L(b01) 92 beq cr6, L(b10) 93 94 L(b11): ld r8, 0(r4) C load s1 limb 95 ld r9, 0(r5) C load s2 limb 96 ld r10, 8(r4) C load s1 limb 97 ld r11, 8(r5) C load s2 limb 98 ld r12, 16(r4) C load s1 limb 99 addi r4, r4, 24 100 ld r0, 16(r5) C load s2 limb 101 addi r5, r5, 24 102 ADDSUBC r29, r9, r8 103 ADDSUBC r30, r11, r10 104 ADDSUBC r31, r0, r12 105 std r29, 0(r3) 106 std r30, 8(r3) 107 std r31, 16(r3) 108 addi r3, r3, 24 109 bdnz L(go) 110 b L(ret) 111 112 L(b01): ld r12, 0(r4) C load s1 limb 113 addi r4, r4, 8 114 ld r0, 0(r5) C load s2 limb 115 addi r5, r5, 8 116 ADDSUBC r31, r0, r12 C add 117 std r31, 0(r3) 118 addi r3, r3, 8 119 bdnz L(go) 120 b L(ret) 121 122 L(b10): ld r10, 0(r4) C load s1 limb 123 ld r11, 0(r5) C load s2 limb 124 ld r12, 8(r4) C load s1 limb 125 addi r4, r4, 16 126 ld r0, 8(r5) C load s2 limb 127 addi r5, r5, 16 128 ADDSUBC r30, r11, r10 C add 129 ADDSUBC r31, r0, r12 C add 130 std r30, 0(r3) 131 std r31, 8(r3) 132 addi r3, r3, 16 133 bdnz L(go) 134 b L(ret) 135 136 L(b00): C INITCY C clear/set cy 137 L(go): ld r6, 0(r4) C load s1 limb 138 ld r7, 0(r5) C load s2 limb 139 ld r8, 8(r4) C load s1 limb 140 ld r9, 8(r5) C load s2 limb 141 ld r10, 16(r4) C load s1 limb 142 ld r11, 16(r5) C load s2 limb 143 ld r12, 24(r4) C load s1 limb 144 ld r0, 24(r5) C load s2 limb 145 bdz L(end) 146 147 addi r4, r4, 32 148 addi r5, r5, 32 149 150 ALIGN(16) 151 L(top): ADDSUBC r28, r7, r6 152 ld r6, 0(r4) C load s1 limb 153 ld r7, 0(r5) C load s2 limb 154 ADDSUBC r29, r9, r8 155 ld r8, 8(r4) C load s1 limb 156 ld r9, 8(r5) C load s2 limb 157 ADDSUBC r30, r11, r10 158 ld r10, 16(r4) C load s1 limb 159 ld r11, 16(r5) C load s2 limb 160 ADDSUBC r31, r0, r12 161 ld r12, 24(r4) C load s1 limb 162 ld r0, 24(r5) C load s2 limb 163 std r28, 0(r3) 164 addi r4, r4, 32 165 std r29, 8(r3) 166 addi r5, r5, 32 167 std r30, 16(r3) 168 std r31, 24(r3) 169 addi r3, r3, 32 170 bdnz L(top) C decrement ctr and loop back 171 172 L(end): ADDSUBC r28, r7, r6 173 ADDSUBC r29, r9, r8 174 ADDSUBC r30, r11, r10 175 ADDSUBC r31, r0, r12 176 std r28, 0(r3) 177 std r29, 8(r3) 178 std r30, 16(r3) 179 std r31, 24(r3) 180 181 L(ret): ld r31, -8(r1) 182 ld r30, -16(r1) 183 ld r29, -24(r1) 184 ld r28, -32(r1) 185 186 subfe r3, r0, r0 C -cy 187 GENRVAL 188 blr 189 EPILOGUE()