github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/mod_1_4.asm (about) 1 dnl PowerPC-64 mpn_mod_1s_4p 2 3 dnl Copyright 2010, 2011 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 9 36 C POWER5 9 37 C POWER6 13 38 C POWER7 3.5 39 40 C TODO 41 C * Optimise, in particular the cps function. This was compiler-generated and 42 C then hand optimised. 43 44 C INPUT PARAMETERS 45 define(`ap', `r3') 46 define(`n', `r4') 47 define(`d', `r5') 48 define(`cps', `r6') 49 50 ASM_START() 51 52 EXTERN_FUNC(mpn_invert_limb) 53 54 PROLOGUE(mpn_mod_1s_4p) 55 std r23, -72(r1) 56 ld r23, 48(cps) 57 std r24, -64(r1) 58 std r25, -56(r1) 59 ld r24, 32(cps) 60 ld r25, 24(cps) 61 std r26, -48(r1) 62 std r27, -40(r1) 63 ld r26, 16(cps) 64 std r28, -32(r1) 65 std r29, -24(r1) 66 std r30, -16(r1) 67 std r31, -8(r1) 68 ld r30, 40(cps) 69 70 rldicl. r0, n, 0,62 71 sldi r31, n, 3 72 add ap, ap, r31 C make ap point at end of operand 73 74 cmpdi cr7, r0, 2 75 beq cr0, L(b00) 76 blt cr7, L(b01) 77 beq cr7, L(b10) 78 79 L(b11): ld r11, -16(ap) 80 ld r9, -8(ap) 81 ld r0, -24(ap) 82 mulhdu r27, r11, r26 83 mulld r8, r11, r26 84 mulhdu r11, r9, r25 85 mulld r9, r9, r25 86 addc r31, r8, r0 87 addze r10, r27 88 addc r0, r9, r31 89 adde r9, r11, r10 90 addi ap, ap, -40 91 b L(6) 92 93 ALIGN(16) 94 L(b00): ld r11, -24(ap) 95 ld r10, -16(ap) 96 ld r9, -8(ap) 97 ld r0, -32(ap) 98 mulld r8, r11, r26 99 mulhdu r7, r10, r25 100 mulhdu r27, r11, r26 101 mulhdu r11, r9, r24 102 mulld r10, r10, r25 103 mulld r9, r9, r24 104 addc r31, r8, r0 105 addze r0, r27 106 addc r8, r31, r10 107 adde r10, r0, r7 108 addc r0, r9, r8 109 adde r9, r11, r10 110 addi ap, ap, -48 111 b L(6) 112 113 ALIGN(16) 114 L(b01): li r9, 0 115 ld r0, -8(ap) 116 addi ap, ap, -24 117 b L(6) 118 119 ALIGN(16) 120 L(b10): ld r9, -8(ap) 121 ld r0, -16(ap) 122 addi ap, ap, -32 123 124 ALIGN(16) 125 L(6): addi r10, n, 3 126 srdi r7, r10, 2 127 mtctr r7 128 bdz L(end) 129 130 ALIGN(16) 131 L(top): ld r31, -16(ap) 132 ld r10, -8(ap) 133 ld r11, 8(ap) 134 ld r12, 0(ap) 135 mulld r29, r0, r30 C rl * B4modb 136 mulhdu r0, r0, r30 C rl * B4modb 137 mulhdu r27, r10, r26 138 mulld r10, r10, r26 139 mulhdu r7, r9, r23 C rh * B5modb 140 mulld r9, r9, r23 C rh * B5modb 141 mulhdu r28, r11, r24 142 mulld r11, r11, r24 143 mulhdu r4, r12, r25 144 mulld r12, r12, r25 145 addc r8, r10, r31 146 addze r10, r27 147 addi ap, ap, -32 148 addc r27, r8, r12 149 adde r12, r10, r4 150 addc r11, r27, r11 151 adde r31, r12, r28 152 addc r12, r11, r29 153 adde r4, r31, r0 154 addc r0, r9, r12 155 adde r9, r7, r4 156 bdnz L(top) 157 158 L(end): 159 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 160 ` lwz r3, 8(cps)', 161 ` lwz r3, 12(cps)') 162 mulld r10, r9, r26 163 mulhdu r9, r9, r26 164 addc r11, r0, r10 165 addze r9, r9 166 ld r10, 0(cps) 167 subfic r8, r3, 64 168 sld r9, r9, r3 169 srd r8, r11, r8 170 sld r11, r11, r3 171 or r9, r8, r9 172 mulld r0, r9, r10 173 mulhdu r10, r9, r10 174 addi r9, r9, 1 175 addc r8, r0, r11 176 adde r0, r10, r9 177 mulld r0, r0, d 178 subf r0, r0, r11 179 cmpld cr7, r8, r0 180 bge cr7, L(9) 181 add r0, r0, d 182 L(9): cmpld cr7, r0, d 183 bge- cr7, L(16) 184 L(10): srd r3, r0, r3 185 ld r23, -72(r1) 186 ld r24, -64(r1) 187 ld r25, -56(r1) 188 ld r26, -48(r1) 189 ld r27, -40(r1) 190 ld r28, -32(r1) 191 ld r29, -24(r1) 192 ld r30, -16(r1) 193 ld r31, -8(r1) 194 blr 195 196 L(16): subf r0, d, r0 197 b L(10) 198 EPILOGUE() 199 200 PROLOGUE(mpn_mod_1s_4p_cps,toc) 201 mflr r0 202 std r29, -24(r1) 203 std r30, -16(r1) 204 mr r29, r3 205 std r0, 16(r1) 206 std r31, -8(r1) 207 stdu r1, -144(r1) 208 cntlzd r31, r4 209 sld r30, r4, r31 210 mr r3, r30 211 CALL( mpn_invert_limb) 212 subfic r9, r31, 64 213 li r10, 1 214 sld r10, r10, r31 215 srd r9, r3, r9 216 neg r0, r30 217 or r10, r10, r9 218 mulld r10, r10, r0 219 mulhdu r11, r10, r3 220 nor r11, r11, r11 221 subf r11, r10, r11 222 mulld r11, r11, r30 223 mulld r0, r10, r3 224 cmpld cr7, r0, r11 225 bge cr7, L(18) 226 add r11, r11, r30 227 L(18): mulhdu r9, r11, r3 228 add r9, r11, r9 229 nor r9, r9, r9 230 mulld r9, r9, r30 231 mulld r0, r11, r3 232 cmpld cr7, r0, r9 233 bge cr7, L(19) 234 add r9, r9, r30 235 L(19): mulhdu r0, r9, r3 236 add r0, r9, r0 237 nor r0, r0, r0 238 mulld r0, r0, r30 239 mulld r8, r9, r3 240 cmpld cr7, r8, r0 241 bge cr7, L(20) 242 add r0, r0, r30 243 L(20): mulhdu r8, r0, r3 244 add r8, r0, r8 245 nor r8, r8, r8 246 mulld r8, r8, r30 247 mulld r7, r0, r3 248 cmpld cr7, r7, r8 249 bge cr7, L(21) 250 add r8, r8, r30 251 L(21): srd r0, r0, r31 252 addi r1, r1, 144 253 srd r8, r8, r31 254 srd r10, r10, r31 255 srd r11, r11, r31 256 std r0, 40(r29) 257 std r31, 8(r29) 258 srd r9, r9, r31 259 ld r0, 16(r1) 260 ld r30, -16(r1) 261 std r8, 48(r29) 262 std r3, 0(r29) 263 mtlr r0 264 ld r31, -8(r1) 265 std r10, 16(r29) 266 std r11, 24(r29) 267 std r9, 32(r29) 268 ld r29, -24(r1) 269 blr 270 EPILOGUE()