github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/rshift.asm (about) 1 dnl Alpha mpn_rshift -- Shift a number right. 2 3 dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C EV4: ? 35 C EV5: 3.25 36 C EV6: 1.75 37 38 C INPUT PARAMETERS 39 C rp r16 40 C up r17 41 C n r18 42 C cnt r19 43 44 45 ASM_START() 46 PROLOGUE(mpn_rshift) 47 ldq r4,0(r17) C load first limb 48 subq r31,r19,r20 49 subq r18,1,r18 50 and r18,4-1,r28 C number of limbs in first loop 51 sll r4,r20,r0 C compute function result 52 53 beq r28,L(L0) 54 subq r18,r28,r18 55 56 ALIGN(8) 57 L(top0): 58 ldq r3,8(r17) 59 addq r16,8,r16 60 srl r4,r19,r5 61 addq r17,8,r17 62 subq r28,1,r28 63 sll r3,r20,r6 64 bis r3,r3,r4 65 bis r5,r6,r8 66 stq r8,-8(r16) 67 bne r28,L(top0) 68 69 L(L0): srl r4,r19,r24 70 beq r18,L(end) 71 C warm up phase 1 72 ldq r1,8(r17) 73 subq r18,4,r18 74 ldq r2,16(r17) 75 ldq r3,24(r17) 76 ldq r4,32(r17) 77 C warm up phase 2 78 sll r1,r20,r7 79 srl r1,r19,r21 80 sll r2,r20,r8 81 beq r18,L(end1) 82 ldq r1,40(r17) 83 srl r2,r19,r22 84 ldq r2,48(r17) 85 sll r3,r20,r5 86 bis r7,r24,r7 87 srl r3,r19,r23 88 bis r8,r21,r8 89 sll r4,r20,r6 90 ldq r3,56(r17) 91 srl r4,r19,r24 92 ldq r4,64(r17) 93 subq r18,4,r18 94 beq r18,L(end2) 95 ALIGN(16) 96 C main loop 97 L(top): stq r7,0(r16) 98 bis r5,r22,r5 99 stq r8,8(r16) 100 bis r6,r23,r6 101 102 sll r1,r20,r7 103 subq r18,4,r18 104 srl r1,r19,r21 105 unop C ldq r31,-96(r17) 106 107 sll r2,r20,r8 108 ldq r1,72(r17) 109 srl r2,r19,r22 110 ldq r2,80(r17) 111 112 stq r5,16(r16) 113 bis r7,r24,r7 114 stq r6,24(r16) 115 bis r8,r21,r8 116 117 sll r3,r20,r5 118 unop C ldq r31,-96(r17) 119 srl r3,r19,r23 120 addq r16,32,r16 121 122 sll r4,r20,r6 123 ldq r3,88(r17) 124 srl r4,r19,r24 125 ldq r4,96(r17) 126 127 addq r17,32,r17 128 bne r18,L(top) 129 C cool down phase 2/1 130 L(end2): 131 stq r7,0(r16) 132 bis r5,r22,r5 133 stq r8,8(r16) 134 bis r6,r23,r6 135 sll r1,r20,r7 136 srl r1,r19,r21 137 sll r2,r20,r8 138 srl r2,r19,r22 139 stq r5,16(r16) 140 bis r7,r24,r7 141 stq r6,24(r16) 142 bis r8,r21,r8 143 sll r3,r20,r5 144 srl r3,r19,r23 145 sll r4,r20,r6 146 srl r4,r19,r24 147 C cool down phase 2/2 148 stq r7,32(r16) 149 bis r5,r22,r5 150 stq r8,40(r16) 151 bis r6,r23,r6 152 stq r5,48(r16) 153 stq r6,56(r16) 154 C cool down phase 2/3 155 stq r24,64(r16) 156 ret r31,(r26),1 157 158 C cool down phase 1/1 159 L(end1): 160 srl r2,r19,r22 161 sll r3,r20,r5 162 bis r7,r24,r7 163 srl r3,r19,r23 164 bis r8,r21,r8 165 sll r4,r20,r6 166 srl r4,r19,r24 167 C cool down phase 1/2 168 stq r7,0(r16) 169 bis r5,r22,r5 170 stq r8,8(r16) 171 bis r6,r23,r6 172 stq r5,16(r16) 173 stq r6,24(r16) 174 stq r24,32(r16) 175 ret r31,(r26),1 176 177 L(end): stq r24,0(r16) 178 ret r31,(r26),1 179 EPILOGUE(mpn_rshift) 180 ASM_END()