github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/mod_34lsub1.asm (about) 1 dnl Alpha mpn_mod_34lsub1. 2 3 dnl Copyright 2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C EV4: 4 (?) 35 C EV5: 2.67 36 C EV6: 1.67 37 38 39 dnl INPUT PARAMETERS 40 dnl up r16 41 dnl n r17 42 43 define(`l0',`r18') 44 define(`l1',`r19') 45 define(`l2',`r20') 46 define(`a0',`r21') 47 define(`a1',`r22') 48 define(`a2',`r23') 49 define(`c0',`r24') 50 define(`c1',`r5') 51 define(`c2',`r6') 52 53 ASM_START() 54 PROLOGUE(mpn_mod_34lsub1) 55 bis r31, r31, c0 56 bis r31, r31, c1 57 bis r31, r31, c2 58 59 lda r17, -3(r17) 60 bge r17, $L_3_or_more 61 bis r31, r31, a0 62 bis r31, r31, a1 63 bis r31, r31, a2 64 br r31, $L_012 65 66 $L_3_or_more: 67 ldq a0, 0(r16) 68 ldq a1, 8(r16) 69 ldq a2, 16(r16) 70 lda r16, 24(r16) 71 lda r17, -3(r17) 72 blt r17, $L_012 73 74 $L_6_or_more: 75 ldq l0, 0(r16) 76 ldq l1, 8(r16) 77 ldq l2, 16(r16) 78 addq l0, a0, a0 79 80 lda r16, 24(r16) 81 lda r17, -3(r17) 82 blt r17, $L_end 83 84 ALIGN(16) 85 C Main loop 86 $L_9_or_more: 87 $Loop: cmpult a0, l0, r0 88 ldq l0, 0(r16) 89 addq r0, c0, c0 90 addq l1, a1, a1 91 cmpult a1, l1, r0 92 ldq l1, 8(r16) 93 addq r0, c1, c1 94 addq l2, a2, a2 95 cmpult a2, l2, r0 96 ldq l2, 16(r16) 97 addq r0, c2, c2 98 addq l0, a0, a0 99 lda r16, 24(r16) 100 lda r17, -3(r17) 101 bge r17, $Loop 102 103 $L_end: cmpult a0, l0, r0 104 addq r0, c0, c0 105 addq l1, a1, a1 106 cmpult a1, l1, r0 107 addq r0, c1, c1 108 addq l2, a2, a2 109 cmpult a2, l2, r0 110 addq r0, c2, c2 111 112 C Handle the last (n mod 3) limbs 113 $L_012: lda r17, 2(r17) 114 blt r17, $L_0 115 ldq l0, 0(r16) 116 addq l0, a0, a0 117 cmpult a0, l0, r0 118 addq r0, c0, c0 119 beq r17, $L_0 120 ldq l1, 8(r16) 121 addq l1, a1, a1 122 cmpult a1, l1, r0 123 addq r0, c1, c1 124 125 C Align and sum our 3 main accumulators and 3 carry accumulators 126 $L_0: srl a0, 48, r2 127 srl a1, 32, r4 128 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 129 ` insll a1, 2, r1', C (a1 & 0xffffffff) << 16 130 ` zapnot a1, 15, r25 131 sll r25, 16, r1') 132 zapnot a0, 63, r0 C a0 & 0xffffffffffff 133 srl a2, 16, a1 134 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 135 ` inswl a2, 4, r3', C (a2 & 0xffff) << 32 136 ` zapnot a2, 3, r25 137 sll r25, 32, r3') 138 addq r1, r4, r1 139 addq r0, r2, r0 140 srl c0, 32, a2 141 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 142 ` insll c0, 2, r4', C (c0 & 0xffffffff) << 16 143 ` zapnot c0, 15, r25 144 sll r25, 16, r4') 145 addq r0, r1, r0 146 addq r3, a1, r3 147 addq r0, r3, r0 148 srl c1, 16, c0 149 ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 150 ` inswl c1, 4, r2', C (c1 & 0xffff) << 32 151 ` zapnot c1, 3, r25 152 sll r25, 32, r2') 153 addq r4, a2, r4 154 C srl c2, 48, r3 C This will be 0 in practise 155 zapnot c2, 63, r1 C r1 = c2 & 0xffffffffffff 156 addq r0, r4, r0 157 addq r2, c0, r2 158 addq r0, r2, r0 159 C addq r1, r3, r1 160 addq r0, r1, r0 161 162 ret r31, (r26), 1 163 EPILOGUE(mpn_mod_34lsub1) 164 ASM_END()