github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/atom/aorrlsh2_n.asm (about) 1 dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2) 2 dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[] 3 dnl Optimised for Intel Atom. 4 5 dnl Contributed to the GNU project by Torbjorn Granlund. 6 7 dnl Copyright 2011, 2012 Free Software Foundation, Inc. 8 9 dnl This file is part of the GNU MP Library. 10 dnl 11 dnl The GNU MP Library is free software; you can redistribute it and/or modify 12 dnl it under the terms of either: 13 dnl 14 dnl * the GNU Lesser General Public License as published by the Free 15 dnl Software Foundation; either version 3 of the License, or (at your 16 dnl option) any later version. 17 dnl 18 dnl or 19 dnl 20 dnl * the GNU General Public License as published by the Free Software 21 dnl Foundation; either version 2 of the License, or (at your option) any 22 dnl later version. 23 dnl 24 dnl or both in parallel, as here. 25 dnl 26 dnl The GNU MP Library is distributed in the hope that it will be useful, but 27 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 dnl for more details. 30 dnl 31 dnl You should have received copies of the GNU General Public License and the 32 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 33 dnl see https://www.gnu.org/licenses/. 34 35 include(`../config.m4') 36 37 C cycles/limb 38 C AMD K8,K9 ? 39 C AMD K10 ? 40 C Intel P4 ? 41 C Intel core2 ? 42 C Intel NHM ? 43 C Intel SBR ? 44 C Intel atom 5.75 45 C VIA nano ? 46 47 C INPUT PARAMETERS 48 define(`rp', `%rdi') 49 define(`up', `%rsi') 50 define(`vp', `%rdx') 51 define(`n', `%rcx') 52 53 define(`LSH', 2) 54 define(`RSH', 62) 55 define(M, eval(m4_lshift(1,LSH))) 56 57 ifdef(`OPERATION_addlsh2_n', ` 58 define(ADDSUB, add) 59 define(ADCSBB, adc) 60 define(func_n, mpn_addlsh2_n) 61 define(func_nc, mpn_addlsh2_nc)') 62 ifdef(`OPERATION_rsblsh2_n', ` 63 define(ADDSUB, sub) 64 define(ADCSBB, sbb) 65 define(func_n, mpn_rsblsh2_n) 66 define(func_nc, mpn_rsblsh2_nc)') 67 68 ABI_SUPPORT(DOS64) 69 ABI_SUPPORT(STD64) 70 71 MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n) 72 73 ASM_START() 74 TEXT 75 ALIGN(16) 76 PROLOGUE(func_n) 77 FUNC_ENTRY(4) 78 push %rbx 79 push %rbp 80 81 mov R32(n), R32(%rax) 82 and $3, R32(%rax) 83 jz L(b0) C we rely on rax = 0 at target 84 cmp $2, R32(%rax) 85 mov $0, R32(%rax) 86 jz L(b2) 87 jg L(b3) 88 89 L(b1): mov (vp), %r9 90 lea (%rax,%r9,M), %rbp 91 shr $RSH, %r9 92 sub $1, n 93 lea -8(up), up 94 lea -8(rp), rp 95 jz L(cj1) 96 mov 8(vp), %r10 97 lea (%r9,%r10,M), %r9 98 shr $RSH, %r10 99 mov 16(vp), %r11 100 lea 24(vp), vp 101 mov (vp), %r8 102 lea (%r10,%r11,M), %r10 103 shr $RSH, %r11 104 add R32(%rax), R32(%rax) 105 jmp L(L1) 106 107 L(b2): lea -32(rp), rp 108 mov (vp), %r8 109 lea -32(up), up 110 lea (%rax,%r8,M), %rbx 111 shr $RSH, %r8 112 mov 8(vp), %r9 113 sub $2, n 114 jle L(end) 115 jmp L(top) 116 117 L(b3): lea -24(up), up 118 mov (vp), %r11 119 lea -24(rp), rp 120 mov 8(vp), %r8 121 lea (%rax,%r11,M), %r10 122 shr $RSH, %r11 123 lea 8(vp), vp 124 lea (%r11,%r8,M), %rbx 125 add $1, n 126 jmp L(L3) 127 128 L(b0): lea -16(up), up 129 mov (vp), %r10 130 lea (%rax,%r10,M), %r9 131 shr $RSH, %r10 132 mov 8(vp), %r11 133 lea -16(rp), rp 134 mov 16(vp), %r8 135 lea (%r10,%r11,M), %r10 136 shr $RSH, %r11 137 add R32(%rax), R32(%rax) 138 lea 16(vp), vp 139 jmp L(L0) 140 141 ALIGN(16) 142 L(top): lea (%r8,%r9,M), %rbp 143 shr $RSH, %r9 144 lea 32(up), up 145 mov 16(vp), %r10 146 lea (%r9,%r10,M), %r9 147 shr $RSH, %r10 148 mov 24(vp), %r11 149 lea 32(rp), rp 150 lea 32(vp), vp 151 mov (vp), %r8 152 lea (%r10,%r11,M), %r10 153 shr $RSH, %r11 154 add R32(%rax), R32(%rax) 155 ADCSBB (up), %rbx 156 mov %rbx, (rp) 157 L(L1): ADCSBB 8(up), %rbp 158 mov %rbp, 8(rp) 159 L(L0): ADCSBB 16(up), %r9 160 lea (%r11,%r8,M), %rbx 161 mov %r9, 16(rp) 162 L(L3): ADCSBB 24(up), %r10 163 sbb R32(%rax), R32(%rax) 164 L(L2): shr $RSH, %r8 165 mov 8(vp), %r9 166 mov %r10, 24(rp) 167 sub $4, n 168 jg L(top) 169 170 L(end): lea (%r8,%r9,M), %rbp 171 shr $RSH, %r9 172 lea 32(up), up 173 lea 32(rp), rp 174 add R32(%rax), R32(%rax) 175 ADCSBB (up), %rbx 176 mov %rbx, (rp) 177 L(cj1): ADCSBB 8(up), %rbp 178 mov %rbp, 8(rp) 179 180 ifdef(`OPERATION_addlsh2_n',` 181 mov R32(n), R32(%rax) C zero rax 182 adc %r9, %rax') 183 ifdef(`OPERATION_rsblsh2_n',` 184 sbb n, %r9 C subtract 0 185 mov %r9, %rax') 186 187 pop %rbp 188 pop %rbx 189 FUNC_EXIT() 190 ret 191 EPILOGUE()