github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/aorslsh2_n.asm (about) 1 dnl Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). 2 3 dnl Copyright 2003, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C EV4: ? 35 C EV5: 6 36 C EV6: 3.75 37 38 C TODO 39 C * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5. 40 41 define(`rp',`r16') 42 define(`up',`r17') 43 define(`vp',`r18') 44 define(`n', `r19') 45 46 define(`u0', `r8') 47 define(`u1', `r1') 48 define(`v0', `r4') 49 define(`v1', `r5') 50 51 define(`cy0', `r0') 52 define(`cy1', `r20') 53 define(`cy', `r22') 54 define(`rr', `r24') 55 define(`ps', `r25') 56 define(`sl', `r28') 57 58 ifdef(`OPERATION_addlsh2_n',` 59 define(ADDSUB, addq) 60 define(CARRY, `cmpult $1,$2,$3') 61 define(func, mpn_addlsh2_n) 62 ') 63 ifdef(`OPERATION_sublsh2_n',` 64 define(ADDSUB, subq) 65 define(CARRY, `cmpult $2,$1,$3') 66 define(func, mpn_sublsh2_n) 67 ') 68 69 MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n) 70 71 ASM_START() 72 PROLOGUE(func) 73 and n, 2, cy0 74 blbs n, L(bx1) 75 L(bx0): ldq v1, 0(vp) 76 ldq u1, 0(up) 77 bis r31, r31, r2 78 bne cy0, L(b10) 79 80 L(b00): lda vp, 48(vp) 81 lda up, -16(up) 82 lda rp, -8(rp) 83 s4addq v1, r31, sl 84 br r31, L(lo0) 85 86 L(b10): lda vp, 32(vp) 87 lda rp, 8(rp) 88 lda cy0, 0(r31) 89 br r31, L(lo2) 90 91 L(bx1): ldq v0, 0(vp) 92 ldq u0, 0(up) 93 lda cy1, 0(r31) 94 bis r31, r31, r3 95 nop 96 beq cy0, L(b01) 97 98 L(b11): lda vp, 40(vp) 99 lda up, -24(up) 100 lda rp, 16(rp) 101 br r31, L(lo3) 102 103 L(b01): lda n, -4(n) 104 ble n, L(end) 105 lda vp, 24(vp) 106 lda up, -8(up) 107 108 ALIGN(16) 109 L(top): s4addq v0, r3, sl C combined vlimb 110 ldq v1, -16(vp) 111 ADDSUB u0, sl, ps C ulimb + (vlimb << 1) 112 ldq u1, 16(up) 113 srl v0, 62, r2 C high v bits 114 ADDSUB ps, cy1, rr C consume carry from previous operation 115 CARRY( ps, u0, cy0) C carry out #2 116 stq rr, 0(rp) 117 CARRY( rr, ps, cy) C carry out #3 118 lda vp, 32(vp) C bookkeeping 119 addq cy, cy0, cy0 C final carry out 120 s4addq v1, r2, sl 121 L(lo0): ldq v0, -40(vp) 122 ADDSUB u1, sl, ps 123 ldq u0, 24(up) 124 srl v1, 62, r3 125 ADDSUB ps, cy0, rr 126 CARRY( ps, u1, cy1) 127 stq rr, 8(rp) 128 CARRY( rr, ps, cy) 129 lda rp, 32(rp) C bookkeeping 130 addq cy, cy1, cy1 131 L(lo3): s4addq v0, r3, sl 132 ldq v1, -32(vp) 133 ADDSUB u0, sl, ps 134 ldq u1, 32(up) 135 srl v0, 62, r2 136 ADDSUB ps, cy1, rr 137 CARRY( ps, u0, cy0) 138 stq rr, -16(rp) 139 CARRY( rr, ps, cy) 140 lda up, 32(up) C bookkeeping 141 addq cy, cy0, cy0 142 L(lo2): s4addq v1, r2, sl 143 ldq v0, -24(vp) 144 ADDSUB u1, sl, ps 145 ldq u0, 8(up) 146 srl v1, 62, r3 147 ADDSUB ps, cy0, rr 148 CARRY( ps, u1, cy1) 149 stq rr, -8(rp) 150 CARRY( rr, ps, cy) 151 lda n, -4(n) C bookkeeping 152 addq cy, cy1, cy1 153 bgt n, L(top) 154 155 L(end): s4addq v0, r3, sl 156 ADDSUB u0, sl, ps 157 srl v0, 62, r2 158 ADDSUB ps, cy1, rr 159 CARRY( ps, u0, cy0) 160 stq rr, 0(rp) 161 CARRY( rr, ps, cy) 162 addq cy, cy0, cy0 163 addq cy0, r2, r0 164 165 ret r31,(r26),1 166 EPILOGUE() 167 ASM_END()