github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/pentium4/aorslshC_n.asm (about) 1 dnl AMD64 mpn_addlshC_n, mpn_sublshC_n -- rp[] = up[] +- (vp[] << C), where 2 dnl C is 1, 2, 3. Optimized for Pentium 4. 3 4 dnl Contributed to the GNU project by Torbjorn Granlund. 5 6 dnl Copyright 2008, 2010-2012 Free Software Foundation, Inc. 7 8 dnl This file is part of the GNU MP Library. 9 dnl 10 dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 dnl it under the terms of either: 12 dnl 13 dnl * the GNU Lesser General Public License as published by the Free 14 dnl Software Foundation; either version 3 of the License, or (at your 15 dnl option) any later version. 16 dnl 17 dnl or 18 dnl 19 dnl * the GNU General Public License as published by the Free Software 20 dnl Foundation; either version 2 of the License, or (at your option) any 21 dnl later version. 22 dnl 23 dnl or both in parallel, as here. 24 dnl 25 dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 dnl for more details. 29 dnl 30 dnl You should have received copies of the GNU General Public License and the 31 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 dnl see https://www.gnu.org/licenses/. 33 34 C cycles/limb 35 C AMD K8,K9 3.8 36 C AMD K10 3.8 37 C Intel P4 5.8 38 C Intel core2 4.75 39 C Intel corei 4.75 40 C Intel atom ? 41 C VIA nano 4.75 42 43 44 C INPUT PARAMETERS 45 define(`rp',`%rdi') 46 define(`up',`%rsi') 47 define(`vp',`%rdx') 48 define(`n', `%rcx') 49 50 define(M, eval(m4_lshift(1,LSH))) 51 52 ABI_SUPPORT(DOS64) 53 ABI_SUPPORT(STD64) 54 55 ASM_START() 56 TEXT 57 ALIGN(16) 58 PROLOGUE(func) 59 FUNC_ENTRY(4) 60 push %rbx 61 push %r12 62 push %rbp 63 64 mov (vp), %r9 65 shl $LSH, %r9 66 mov 4(vp), R32(%rbp) 67 68 xor R32(%rbx), R32(%rbx) 69 70 mov R32(n), R32(%rax) 71 and $3, R32(%rax) 72 jne L(n00) C n = 0, 4, 8, ... 73 74 mov (up), %r8 75 mov 8(up), %r10 76 shr $RSH, R32(%rbp) 77 ADDSUB %r9, %r8 78 mov 8(vp), %r9 79 lea (%rbp,%r9,M), %r9 80 setc R8(%rax) 81 mov 12(vp), R32(%rbp) 82 lea -16(rp), rp 83 jmp L(L00) 84 85 L(n00): cmp $2, R32(%rax) 86 jnc L(n01) C n = 1, 5, 9, ... 87 mov (up), %r11 88 lea -8(rp), rp 89 shr $RSH, R32(%rbp) 90 ADDSUB %r9, %r11 91 setc R8(%rbx) 92 dec n 93 jz L(1) C jump for n = 1 94 mov 8(up), %r8 95 mov 8(vp), %r9 96 lea (%rbp,%r9,M), %r9 97 mov 12(vp), R32(%rbp) 98 lea 8(up), up 99 lea 8(vp), vp 100 jmp L(L01) 101 102 L(n01): jne L(n10) C n = 2, 6, 10, ... 103 mov (up), %r12 104 mov 8(up), %r11 105 shr $RSH, R32(%rbp) 106 ADDSUB %r9, %r12 107 mov 8(vp), %r9 108 lea (%rbp,%r9,M), %r9 109 setc R8(%rax) 110 mov 12(vp), R32(%rbp) 111 lea 16(up), up 112 lea 16(vp), vp 113 jmp L(L10) 114 115 L(n10): mov (up), %r10 116 mov 8(up), %r12 117 shr $RSH, R32(%rbp) 118 ADDSUB %r9, %r10 119 mov 8(vp), %r9 120 lea (%rbp,%r9,M), %r9 121 setc R8(%rbx) 122 mov 12(vp), R32(%rbp) 123 lea -24(rp), rp 124 lea -8(up), up 125 lea -8(vp), vp 126 jmp L(L11) 127 128 L(c0): mov $1, R8(%rbx) 129 jmp L(rc0) 130 L(c1): mov $1, R8(%rax) 131 jmp L(rc1) 132 L(c2): mov $1, R8(%rbx) 133 jmp L(rc2) 134 135 ALIGN(16) 136 L(top): mov (up), %r8 C not on critical path 137 shr $RSH, R32(%rbp) 138 ADDSUB %r9, %r11 C not on critical path 139 mov (vp), %r9 140 lea (%rbp,%r9,M), %r9 141 setc R8(%rbx) C save carry out 142 mov 4(vp), R32(%rbp) 143 mov %r12, (rp) 144 ADDSUB %rax, %r11 C apply previous carry out 145 jc L(c0) C jump if ripple 146 L(rc0): 147 L(L01): mov 8(up), %r10 148 shr $RSH, R32(%rbp) 149 ADDSUB %r9, %r8 150 mov 8(vp), %r9 151 lea (%rbp,%r9,M), %r9 152 setc R8(%rax) 153 mov 12(vp), R32(%rbp) 154 mov %r11, 8(rp) 155 ADDSUB %rbx, %r8 156 jc L(c1) 157 L(rc1): 158 L(L00): mov 16(up), %r12 159 shr $RSH, R32(%rbp) 160 ADDSUB %r9, %r10 161 mov 16(vp), %r9 162 lea (%rbp,%r9,M), %r9 163 setc R8(%rbx) 164 mov 20(vp), R32(%rbp) 165 mov %r8, 16(rp) 166 ADDSUB %rax, %r10 167 jc L(c2) 168 L(rc2): 169 L(L11): mov 24(up), %r11 170 shr $RSH, R32(%rbp) 171 ADDSUB %r9, %r12 172 mov 24(vp), %r9 173 lea (%rbp,%r9,M), %r9 174 lea 32(up), up 175 lea 32(vp), vp 176 setc R8(%rax) 177 mov -4(vp), R32(%rbp) 178 mov %r10, 24(rp) 179 ADDSUB %rbx, %r12 180 jc L(c3) 181 L(rc3): lea 32(rp), rp 182 L(L10): sub $4, n 183 ja L(top) 184 185 L(end): 186 shr $RSH, R32(%rbp) 187 ADDSUB %r9, %r11 188 setc R8(%rbx) 189 mov %r12, (rp) 190 ADDSUB %rax, %r11 191 jnc L(1) 192 mov $1, R8(%rbx) 193 L(1): mov %r11, 8(rp) 194 lea (%rbx,%rbp), R32(%rax) 195 pop %rbp 196 pop %r12 197 pop %rbx 198 FUNC_EXIT() 199 ret 200 L(c3): mov $1, R8(%rax) 201 jmp L(rc3) 202 EPILOGUE() 203 ASM_END()