github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/add_n.asm (about) 1 dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and 2 dnl store sum in a third limb vector. 3 4 dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C EV4: ? 36 C EV5: 4.75 37 C EV6: 3 38 39 dnl INPUT PARAMETERS 40 dnl res_ptr r16 41 dnl s1_ptr r17 42 dnl s2_ptr r18 43 dnl size r19 44 45 ASM_START() 46 PROLOGUE(mpn_add_nc) 47 bis r20,r31,r25 48 br L(com) 49 EPILOGUE() 50 PROLOGUE(mpn_add_n) 51 bis r31,r31,r25 C clear cy 52 L(com): subq r19,4,r19 C decr loop cnt 53 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 54 C Start software pipeline for 1st loop 55 ldq r0,0(r18) 56 ldq r4,0(r17) 57 ldq r1,8(r18) 58 ldq r5,8(r17) 59 addq r17,32,r17 C update s1_ptr 60 addq r0,r4,r28 C 1st main add 61 ldq r2,16(r18) 62 addq r25,r28,r20 C 1st carry add 63 ldq r3,24(r18) 64 cmpult r28,r4,r8 C compute cy from last add 65 ldq r6,-16(r17) 66 cmpult r20,r28,r25 C compute cy from last add 67 ldq r7,-8(r17) 68 bis r8,r25,r25 C combine cy from the two adds 69 subq r19,4,r19 C decr loop cnt 70 addq r1,r5,r28 C 2nd main add 71 addq r18,32,r18 C update s2_ptr 72 addq r28,r25,r21 C 2nd carry add 73 cmpult r28,r5,r8 C compute cy from last add 74 blt r19,$Lend1 C if less than 4 limbs remain, jump 75 C 1st loop handles groups of 4 limbs in a software pipeline 76 ALIGN(16) 77 $Loop: cmpult r21,r28,r25 C compute cy from last add 78 ldq r0,0(r18) 79 bis r8,r25,r25 C combine cy from the two adds 80 ldq r1,8(r18) 81 addq r2,r6,r28 C 3rd main add 82 ldq r4,0(r17) 83 addq r28,r25,r22 C 3rd carry add 84 ldq r5,8(r17) 85 cmpult r28,r6,r8 C compute cy from last add 86 cmpult r22,r28,r25 C compute cy from last add 87 stq r20,0(r16) 88 bis r8,r25,r25 C combine cy from the two adds 89 stq r21,8(r16) 90 addq r3,r7,r28 C 4th main add 91 addq r28,r25,r23 C 4th carry add 92 cmpult r28,r7,r8 C compute cy from last add 93 cmpult r23,r28,r25 C compute cy from last add 94 addq r17,32,r17 C update s1_ptr 95 bis r8,r25,r25 C combine cy from the two adds 96 addq r16,32,r16 C update res_ptr 97 addq r0,r4,r28 C 1st main add 98 ldq r2,16(r18) 99 addq r25,r28,r20 C 1st carry add 100 ldq r3,24(r18) 101 cmpult r28,r4,r8 C compute cy from last add 102 ldq r6,-16(r17) 103 cmpult r20,r28,r25 C compute cy from last add 104 ldq r7,-8(r17) 105 bis r8,r25,r25 C combine cy from the two adds 106 subq r19,4,r19 C decr loop cnt 107 stq r22,-16(r16) 108 addq r1,r5,r28 C 2nd main add 109 stq r23,-8(r16) 110 addq r25,r28,r21 C 2nd carry add 111 addq r18,32,r18 C update s2_ptr 112 cmpult r28,r5,r8 C compute cy from last add 113 bge r19,$Loop 114 C Finish software pipeline for 1st loop 115 $Lend1: cmpult r21,r28,r25 C compute cy from last add 116 bis r8,r25,r25 C combine cy from the two adds 117 addq r2,r6,r28 C 3rd main add 118 addq r28,r25,r22 C 3rd carry add 119 cmpult r28,r6,r8 C compute cy from last add 120 cmpult r22,r28,r25 C compute cy from last add 121 stq r20,0(r16) 122 bis r8,r25,r25 C combine cy from the two adds 123 stq r21,8(r16) 124 addq r3,r7,r28 C 4th main add 125 addq r28,r25,r23 C 4th carry add 126 cmpult r28,r7,r8 C compute cy from last add 127 cmpult r23,r28,r25 C compute cy from last add 128 bis r8,r25,r25 C combine cy from the two adds 129 addq r16,32,r16 C update res_ptr 130 stq r22,-16(r16) 131 stq r23,-8(r16) 132 $Lend2: addq r19,4,r19 C restore loop cnt 133 beq r19,$Lret 134 C Start software pipeline for 2nd loop 135 ldq r0,0(r18) 136 ldq r4,0(r17) 137 subq r19,1,r19 138 beq r19,$Lend0 139 C 2nd loop handles remaining 1-3 limbs 140 ALIGN(16) 141 $Loop0: addq r0,r4,r28 C main add 142 ldq r0,8(r18) 143 cmpult r28,r4,r8 C compute cy from last add 144 ldq r4,8(r17) 145 addq r28,r25,r20 C carry add 146 addq r18,8,r18 147 addq r17,8,r17 148 stq r20,0(r16) 149 cmpult r20,r28,r25 C compute cy from last add 150 subq r19,1,r19 C decr loop cnt 151 bis r8,r25,r25 C combine cy from the two adds 152 addq r16,8,r16 153 bne r19,$Loop0 154 $Lend0: addq r0,r4,r28 C main add 155 addq r28,r25,r20 C carry add 156 cmpult r28,r4,r8 C compute cy from last add 157 cmpult r20,r28,r25 C compute cy from last add 158 stq r20,0(r16) 159 bis r8,r25,r25 C combine cy from the two adds 160 161 $Lret: bis r25,r31,r0 C return cy 162 ret r31,(r26),1 163 EPILOGUE() 164 ASM_END()