github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/rsh1aors_n.asm (about) 1 dnl PowerPC-64 mpn_rsh1add_n, mpn_rsh1sub_n 2 3 dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 2.9 36 C POWER5 ? 37 C POWER6 3.5 38 C POWER7 2.25 39 40 define(`rp', `r3') 41 define(`up', `r4') 42 define(`vp', `r5') 43 define(`n', `r6') 44 45 ifdef(`OPERATION_rsh1add_n', ` 46 define(`ADDSUBC', `addc') 47 define(`ADDSUBE', `adde') 48 define(INITCY, `addic $1, r1, 0') 49 define(`func', mpn_rsh1add_n)') 50 ifdef(`OPERATION_rsh1sub_n', ` 51 define(`ADDSUBC', `subfc') 52 define(`ADDSUBE', `subfe') 53 define(INITCY, `addic $1, r1, -1') 54 define(`func', mpn_rsh1sub_n)') 55 56 define(`s0', `r9') 57 define(`s1', `r7') 58 define(`x0', `r0') 59 define(`x1', `r12') 60 define(`u0', `r8') 61 define(`v0', `r10') 62 63 MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n) 64 65 ASM_START() 66 PROLOGUE(func) 67 ld u0, 0(up) 68 ld v0, 0(vp) 69 70 cmpdi cr6, n, 2 71 72 addi r0, n, 1 73 srdi r0, r0, 2 74 mtctr r0 C copy size to count register 75 76 andi. r0, n, 1 77 bne cr0, L(bx1) 78 79 L(bx0): ADDSUBC x1, v0, u0 80 ld u0, 8(up) 81 ld v0, 8(vp) 82 ADDSUBE x0, v0, u0 83 ble cr6, L(n2) 84 ld u0, 16(up) 85 ld v0, 16(vp) 86 srdi s0, x1, 1 87 rldicl r11, x1, 0, 63 C return value 88 ADDSUBE x1, v0, u0 89 andi. n, n, 2 90 bne cr0, L(b10) 91 L(b00): addi rp, rp, -24 92 b L(lo0) 93 L(b10): addi up, up, 16 94 addi vp, vp, 16 95 addi rp, rp, -8 96 b L(lo2) 97 98 ALIGN(16) 99 L(bx1): ADDSUBC x0, v0, u0 100 ble cr6, L(n1) 101 ld u0, 8(up) 102 ld v0, 8(vp) 103 ADDSUBE x1, v0, u0 104 ld u0, 16(up) 105 ld v0, 16(vp) 106 srdi s1, x0, 1 107 rldicl r11, x0, 0, 63 C return value 108 ADDSUBE x0, v0, u0 109 andi. n, n, 2 110 bne cr0, L(b11) 111 L(b01): addi up, up, 8 112 addi vp, vp, 8 113 addi rp, rp, -16 114 b L(lo1) 115 L(b11): addi up, up, 24 116 addi vp, vp, 24 117 bdz L(end) 118 119 ALIGN(32) 120 L(top): ld u0, 0(up) 121 ld v0, 0(vp) 122 srdi s0, x1, 1 123 rldimi s1, x1, 63, 0 124 std s1, 0(rp) 125 ADDSUBE x1, v0, u0 126 L(lo2): ld u0, 8(up) 127 ld v0, 8(vp) 128 srdi s1, x0, 1 129 rldimi s0, x0, 63, 0 130 std s0, 8(rp) 131 ADDSUBE x0, v0, u0 132 L(lo1): ld u0, 16(up) 133 ld v0, 16(vp) 134 srdi s0, x1, 1 135 rldimi s1, x1, 63, 0 136 std s1, 16(rp) 137 ADDSUBE x1, v0, u0 138 L(lo0): ld u0, 24(up) 139 ld v0, 24(vp) 140 srdi s1, x0, 1 141 rldimi s0, x0, 63, 0 142 std s0, 24(rp) 143 ADDSUBE x0, v0, u0 144 addi up, up, 32 145 addi vp, vp, 32 146 addi rp, rp, 32 147 bdnz L(top) 148 149 L(end): srdi s0, x1, 1 150 rldimi s1, x1, 63, 0 151 std s1, 0(rp) 152 L(cj2): srdi s1, x0, 1 153 rldimi s0, x0, 63, 0 154 std s0, 8(rp) 155 L(cj1): ADDSUBE x1, x1, x1 C pseudo-depends on x1 156 rldimi s1, x1, 63, 0 157 std s1, 16(rp) 158 mr r3, r11 159 blr 160 161 L(n1): srdi s1, x0, 1 162 rldicl r11, x0, 0, 63 C return value 163 ADDSUBE x1, x1, x1 C pseudo-depends on x1 164 rldimi s1, x1, 63, 0 165 std s1, 0(rp) 166 mr r3, r11 167 blr 168 169 L(n2): addi rp, rp, -8 170 srdi s0, x1, 1 171 rldicl r11, x1, 0, 63 C return value 172 b L(cj2) 173 EPILOGUE()