github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/divrem_2.asm (about) 1 dnl PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3 dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C norm frac 35 C POWER3/PPC630 36 C POWER4/PPC970 ? ? 37 C POWER5 37 ? 38 C POWER6 62 ? 39 C POWER6 30.5 ? 40 41 C INPUT PARAMETERS 42 C qp = r3 43 C fn = r4 44 C up = r5 45 C un = r6 46 C dp = r7 47 48 49 ifdef(`DARWIN',,` 50 define(`r2',`r31')') C FIXME! 51 52 ASM_START() 53 54 EXTERN_FUNC(mpn_invert_limb) 55 56 PROLOGUE(mpn_divrem_2,toc) 57 mflr r0 58 std r23, -72(r1) 59 std r24, -64(r1) 60 std r25, -56(r1) 61 std r26, -48(r1) 62 std r27, -40(r1) 63 std r28, -32(r1) 64 std r29, -24(r1) 65 std r30, -16(r1) 66 std r31, -8(r1) 67 std r0, 16(r1) 68 stdu r1, -192(r1) 69 mr r24, r3 70 mr r25, r4 71 sldi r0, r6, 3 72 add r26, r5, r0 73 addi r26, r26, -24 74 ld r30, 8(r7) 75 ld r28, 0(r7) 76 ld r29, 16(r26) 77 ld r31, 8(r26) 78 79 ifelse(0,1,` 80 li r23, 0 81 cmpld cr7, r29, r30 82 blt cr7, L(8) 83 bgt cr7, L(9) 84 cmpld cr0, r31, r28 85 blt cr0, L(8) 86 L(9): subfc r31, r28, r31 87 subfe r29, r30, r29 88 li r23, 1 89 ',` 90 li r23, 0 91 cmpld cr7, r29, r30 92 blt cr7, L(8) 93 mfcr r0 94 rlwinm r0, r0, 30, 1 95 subfc r9, r28, r31 96 addze. r0, r0 97 nop 98 beq cr0, L(8) 99 subfc r31, r28, r31 100 subfe r29, r30, r29 101 li r23, 1 102 ') 103 104 L(8): 105 add r27, r25, r6 106 addic. r27, r27, -3 107 blt cr0, L(18) 108 mr r3, r30 109 CALL( mpn_invert_limb) 110 mulld r10, r3, r30 111 mulhdu r0, r3, r28 112 addc r8, r10, r28 113 subfe r11, r1, r1 114 addc r10, r8, r0 115 addze. r11, r11 116 blt cr0, L(91) 117 L(40): 118 subfc r10, r30, r10 119 addme. r11, r11 120 addi r3, r3, -1 121 bge cr0, L(40) 122 L(91): 123 addi r5, r27, 1 124 mtctr r5 125 sldi r0, r27, 3 126 add r24, r24, r0 127 ALIGN(16) 128 L(loop): 129 mulhdu r8, r29, r3 130 mulld r6, r29, r3 131 addc r6, r6, r31 132 adde r8, r8, r29 133 cmpd cr7, r27, r25 134 mulld r0, r30, r8 135 mulhdu r11, r28, r8 136 mulld r10, r28, r8 137 subf r31, r0, r31 138 li r7, 0 139 blt cr7, L(60) 140 ld r7, 0(r26) 141 addi r26, r26, -8 142 nop 143 L(60): subfc r7, r28, r7 144 subfe r31, r30, r31 145 subfc r7, r10, r7 146 subfe r4, r11, r31 147 subfc r9, r6, r4 148 subfe r9, r1, r1 149 andc r6, r28, r9 150 andc r0, r30, r9 151 addc r31, r7, r6 152 adde r29, r4, r0 153 subf r8, r9, r8 154 cmpld cr7, r29, r30 155 bge- cr7, L(fix) 156 L(bck): std r8, 0(r24) 157 addi r24, r24, -8 158 addi r27, r27, -1 159 bdnz L(loop) 160 L(18): 161 std r31, 8(r26) 162 std r29, 16(r26) 163 mr r3, r23 164 addi r1, r1, 192 165 ld r0, 16(r1) 166 mtlr r0 167 ld r23, -72(r1) 168 ld r24, -64(r1) 169 ld r25, -56(r1) 170 ld r26, -48(r1) 171 ld r27, -40(r1) 172 ld r28, -32(r1) 173 ld r29, -24(r1) 174 ld r30, -16(r1) 175 ld r31, -8(r1) 176 blr 177 L(fix): 178 mfcr r0 179 rlwinm r0, r0, 30, 1 180 subfc r9, r28, r31 181 addze. r0, r0 182 beq cr0, L(bck) 183 subfc r31, r28, r31 184 subfe r29, r30, r29 185 addi r8, r8, 1 186 b L(bck) 187 EPILOGUE()