github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/p6/lshiftc.asm (about) 1 dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt 2 3 dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 ? 36 C POWER5 2.25 37 C POWER6 4 38 39 C TODO 40 C * Micro-optimise header code 41 C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236 42 C bytes, 4-way code would become about 50% larger. 43 44 C INPUT PARAMETERS 45 define(`rp_param', `r3') 46 define(`up', `r4') 47 define(`n', `r5') 48 define(`cnt', `r6') 49 50 define(`tnc',`r0') 51 define(`retval',`r3') 52 define(`rp', `r7') 53 54 ASM_START() 55 PROLOGUE(mpn_lshiftc,toc) 56 57 ifdef(`HAVE_ABI_mode32',` 58 rldicl n, n, 0,32 C FIXME: avoid this zero extend 59 ') 60 mflr r12 61 sldi r8, n, 3 62 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block 63 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) 64 add up, up, r8 C make up point at end of up[] 65 add r11, r11, r10 C address of L(oN) for N = cnt 66 srdi r10, n, 1 67 add rp, rp_param, r8 C make rp point at end of rp[] 68 subfic tnc, cnt, 64 69 rlwinm. r8, n, 0,31,31 C extract bit 0 70 mtctr r10 71 beq L(evn) 72 73 L(odd): ld r9, -8(up) 74 cmpdi cr0, n, 1 C n = 1? 75 beq L(1) 76 ld r8, -16(up) 77 addi r11, r11, -88 C L(o1) - L(e1) - 64 78 mtlr r11 79 srd r3, r9, tnc C retval 80 addi up, up, 8 81 addi rp, rp, -8 82 blr C branch to L(oN) 83 84 L(evn): ld r8, -8(up) 85 ld r9, -16(up) 86 addi r11, r11, -64 87 mtlr r11 88 srd r3, r8, tnc C retval 89 blr C branch to L(eN) 90 91 L(1): srd r3, r9, tnc C retval 92 sld r8, r9, cnt 93 nor r8, r8, r8 94 std r8, -8(rp) 95 mtlr r12 96 ifdef(`HAVE_ABI_mode32', 97 ` mr r4, r3 98 srdi r3, r3, 32 99 ') 100 blr 101 102 103 define(SHIFT,` 104 L(lo$1):ld r8, -24(up) 105 nor r11, r11, r11 106 std r11, -8(rp) 107 addi rp, rp, -16 108 L(o$1): srdi r10, r8, eval(64-$1) 109 rldimi r10, r9, $1, 0 110 ld r9, -32(up) 111 addi up, up, -16 112 nor r10, r10, r10 113 std r10, 0(rp) 114 L(e$1): srdi r11, r9, eval(64-$1) 115 rldimi r11, r8, $1, 0 116 bdnz L(lo$1) 117 sldi r10, r9, $1 118 b L(com) 119 nop 120 ') 121 122 ALIGN(64) 123 forloop(`i',1,63,`SHIFT(i)') 124 125 L(com): nor r11, r11, r11 126 nor r10, r10, r10 127 std r11, -8(rp) 128 std r10, -16(rp) 129 mtlr r12 130 ifdef(`HAVE_ABI_mode32', 131 ` mr r4, r3 132 srdi r3, r3, 32 133 ') 134 blr 135 EPILOGUE() 136 ASM_END()