github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/mod_1_1.asm (about) 1 dnl PowerPC-64 mpn_mod_1_1p 2 3 dnl Copyright 2010, 2011 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 17 36 C POWER5 16 37 C POWER6 30 38 C POWER7 10.2 39 40 C TODO 41 C * Optimise, in particular the cps function. This was compiler-generated and 42 C then hand optimised. 43 44 C INPUT PARAMETERS 45 define(`ap', `r3') 46 define(`n', `r4') 47 define(`d', `r5') 48 define(`cps', `r6') 49 50 ASM_START() 51 52 EXTERN_FUNC(mpn_invert_limb) 53 54 PROLOGUE(mpn_mod_1_1p) 55 sldi r10, r4, 3 56 addi r4, r4, -1 57 add r3, r3, r10 58 ld r0, 16(r6) C B1modb 59 ld r12, 24(r6) C B2modb 60 ld r9, -8(r3) 61 ld r10, -16(r3) 62 mtctr r4 63 mulhdu r8, r9, r0 64 mulld r7, r9, r0 65 addc r11, r7, r10 66 addze r9, r8 67 bdz L(end) 68 69 ALIGN(16) 70 L(top): ld r4, -24(r3) 71 addi r3, r3, -8 72 nop 73 mulld r10, r11, r0 74 mulld r8, r9, r12 75 mulhdu r11, r11, r0 76 mulhdu r9, r9, r12 77 addc r7, r10, r4 78 addze r10, r11 79 addc r11, r8, r7 80 adde r9, r9, r10 81 bdnz L(top) 82 83 L(end): 84 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 85 ` lwz r0, 8(r6)', 86 ` lwz r0, 12(r6)') 87 ld r3, 0(r6) 88 cmpdi cr7, r0, 0 89 beq- cr7, L(4) 90 subfic r10, r0, 64 91 sld r9, r9, r0 92 srd r10, r11, r10 93 or r9, r10, r9 94 L(4): subfc r10, r5, r9 95 subfe r10, r10, r10 96 nand r10, r10, r10 97 sld r11, r11, r0 98 and r10, r10, r5 99 subf r9, r10, r9 100 mulhdu r10, r9, r3 101 mulld r3, r9, r3 102 addi r9, r9, 1 103 addc r8, r3, r11 104 adde r3, r10, r9 105 mulld r3, r3, r5 106 subf r3, r3, r11 107 cmpld cr7, r8, r3 108 bge cr7, L(5) C FIXME: Make branch-less 109 add r3, r3, r5 110 L(5): cmpld cr7, r3, r5 111 bge- cr7, L(10) 112 srd r3, r3, r0 113 blr 114 115 L(10): subf r3, r5, r3 116 srd r3, r3, r0 117 blr 118 EPILOGUE() 119 120 PROLOGUE(mpn_mod_1_1p_cps,toc) 121 mflr r0 122 std r29, -24(r1) 123 std r30, -16(r1) 124 std r31, -8(r1) 125 cntlzd r31, r4 126 std r0, 16(r1) 127 extsw r31, r31 128 mr r29, r3 129 stdu r1, -144(r1) 130 sld r30, r4, r31 131 mr r3, r30 132 CALL( mpn_invert_limb) 133 cmpdi cr7, r31, 0 134 neg r0, r30 135 beq- cr7, L(13) 136 subfic r11, r31, 64 137 li r0, 1 138 neg r9, r30 139 srd r11, r3, r11 140 sld r0, r0, r31 141 or r0, r11, r0 142 mulld r0, r0, r9 143 L(13): mulhdu r9, r0, r3 144 mulld r11, r0, r3 145 add r9, r0, r9 146 nor r9, r9, r9 147 mulld r9, r9, r30 148 cmpld cr7, r11, r9 149 bge cr7, L(14) 150 add r9, r9, r30 151 L(14): addi r1, r1, 144 152 srd r0, r0, r31 153 std r31, 8(r29) 154 std r3, 0(r29) 155 std r0, 16(r29) 156 ld r0, 16(r1) 157 srd r9, r9, r31 158 ld r30, -16(r1) 159 ld r31, -8(r1) 160 std r9, 24(r29) 161 ld r29, -24(r1) 162 mtlr r0 163 blr 164 EPILOGUE()