github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/dive_1.asm (about) 1 dnl PowerPC-64 mpn_divexact_1 -- mpn by limb exact division. 2 3 dnl Copyright 2006, 2010 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C norm unorm 35 C POWER3/PPC630 13-19 36 C POWER4/PPC970 16 37 C POWER5 16 16 38 C POWER6 37 46 39 C POWER7 12 12 40 41 C TODO 42 C * Check if n=1 code is really an improvement. It probably isn't. 43 C * Make more similar to mode1o.asm. 44 45 C INPUT PARAMETERS 46 define(`rp', `r3') 47 define(`up', `r4') 48 define(`n', `r5') 49 define(`d', `r6') 50 51 52 ASM_START() 53 54 EXTERN(binvert_limb_table) 55 56 PROLOGUE(mpn_divexact_1,toc) 57 addic. n, n, -1 58 ld r12, 0(up) 59 bne cr0, L(2) 60 divdu r0, r12, d 61 std r0, 0(rp) 62 blr 63 L(2): 64 rldicl. r0, d, 0, 63 65 li r10, 0 66 bne cr0, L(7) 67 neg r0, d 68 and r0, d, r0 69 cntlzd r0, r0 70 subfic r0, r0, 63 71 rldicl r10, r0, 0, 32 72 srd d, d, r0 73 L(7): 74 mtctr n 75 LEA( r5, binvert_limb_table) 76 rldicl r11, d, 63, 57 77 lbzx r0, r5, r11 78 mulld r9, r0, r0 79 sldi r0, r0, 1 80 mulld r9, d, r9 81 subf r0, r9, r0 82 mulld r5, r0, r0 83 sldi r0, r0, 1 84 mulld r5, d, r5 85 subf r0, r5, r0 86 mulld r9, r0, r0 87 sldi r0, r0, 1 88 mulld r9, d, r9 89 subf r7, r9, r0 C r7 = 1/d mod 2^64 90 bne cr0, L(norm) 91 subfic r8, r10, 64 C set carry as side effect 92 li r5, 0 93 srd r11, r12, r10 94 95 ALIGN(16) 96 L(loop0): 97 ld r12, 8(up) 98 nop 99 addi up, up, 8 100 sld r0, r12, r8 101 or r11, r11, r0 102 subfe r9, r5, r11 103 srd r11, r12, r10 104 mulld r0, r7, r9 105 mulhdu r5, r0, d 106 std r0, 0(rp) 107 addi rp, rp, 8 108 bdnz L(loop0) 109 110 subfe r0, r5, r11 111 mulld r0, r7, r0 112 std r0, 0(rp) 113 blr 114 115 ALIGN(16) 116 L(norm): 117 mulld r11, r12, r7 118 mulhdu r5, r11, d 119 std r11, 0(rp) 120 ALIGN(16) 121 L(loop1): 122 ld r9, 8(up) 123 addi up, up, 8 124 subfe r5, r5, r9 125 mulld r11, r7, r5 126 mulhdu r5, r11, d C result not used 127 std r11, 8(rp) 128 addi rp, rp, 8 129 bdnz L(loop1) 130 blr 131 EPILOGUE() 132 ASM_END()