github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aorrlshC_n.asm (about) 1 dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C) 2 dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[] 3 4 dnl Copyright 2009-2012 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 C cycles/limb 34 C AMD K8,K9 2 35 C AMD K10 2 36 C Intel P4 ? 37 C Intel core2 3 38 C Intel NHM 2.75 39 C Intel SBR 2.55 40 C Intel atom ? 41 C VIA nano ? 42 43 C INPUT PARAMETERS 44 define(`rp', `%rdi') 45 define(`up', `%rsi') 46 define(`vp', `%rdx') 47 define(`n', `%rcx') 48 49 define(M, eval(m4_lshift(1,LSH))) 50 51 ABI_SUPPORT(DOS64) 52 ABI_SUPPORT(STD64) 53 54 ASM_START() 55 TEXT 56 ALIGN(16) 57 PROLOGUE(func) 58 FUNC_ENTRY(4) 59 push %r12 60 push %r13 61 push %r14 62 push %r15 63 64 mov (vp), %r8 65 lea (,%r8,M), %r12 66 shr $RSH, %r8 67 68 mov R32(n), R32(%rax) 69 lea (rp,n,8), rp 70 lea (up,n,8), up 71 lea (vp,n,8), vp 72 neg n 73 and $3, R8(%rax) 74 je L(b00) 75 cmp $2, R8(%rax) 76 jc L(b01) 77 je L(b10) 78 79 L(b11): mov 8(vp,n,8), %r10 80 lea (%r8,%r10,M), %r14 81 shr $RSH, %r10 82 mov 16(vp,n,8), %r11 83 lea (%r10,%r11,M), %r15 84 shr $RSH, %r11 85 ADDSUB (up,n,8), %r12 86 ADCSBB 8(up,n,8), %r14 87 ADCSBB 16(up,n,8), %r15 88 sbb R32(%rax), R32(%rax) C save carry for next 89 mov %r12, (rp,n,8) 90 mov %r14, 8(rp,n,8) 91 mov %r15, 16(rp,n,8) 92 add $3, n 93 js L(top) 94 jmp L(end) 95 96 L(b01): mov %r8, %r11 97 ADDSUB (up,n,8), %r12 98 sbb R32(%rax), R32(%rax) C save carry for next 99 mov %r12, (rp,n,8) 100 add $1, n 101 js L(top) 102 jmp L(end) 103 104 L(b10): mov 8(vp,n,8), %r11 105 lea (%r8,%r11,M), %r15 106 shr $RSH, %r11 107 ADDSUB (up,n,8), %r12 108 ADCSBB 8(up,n,8), %r15 109 sbb R32(%rax), R32(%rax) C save carry for next 110 mov %r12, (rp,n,8) 111 mov %r15, 8(rp,n,8) 112 add $2, n 113 js L(top) 114 jmp L(end) 115 116 L(b00): mov 8(vp,n,8), %r9 117 mov 16(vp,n,8), %r10 118 jmp L(e00) 119 120 ALIGN(16) 121 L(top): mov 16(vp,n,8), %r10 122 mov (vp,n,8), %r8 123 mov 8(vp,n,8), %r9 124 lea (%r11,%r8,M), %r12 125 shr $RSH, %r8 126 L(e00): lea (%r8,%r9,M), %r13 127 shr $RSH, %r9 128 mov 24(vp,n,8), %r11 129 lea (%r9,%r10,M), %r14 130 shr $RSH, %r10 131 lea (%r10,%r11,M), %r15 132 shr $RSH, %r11 133 add R32(%rax), R32(%rax) C restore carry 134 ADCSBB (up,n,8), %r12 135 ADCSBB 8(up,n,8), %r13 136 ADCSBB 16(up,n,8), %r14 137 ADCSBB 24(up,n,8), %r15 138 mov %r12, (rp,n,8) 139 mov %r13, 8(rp,n,8) 140 mov %r14, 16(rp,n,8) 141 sbb R32(%rax), R32(%rax) C save carry for next 142 mov %r15, 24(rp,n,8) 143 add $4, n 144 js L(top) 145 L(end): 146 147 ifelse(ADDSUB,add,` 148 sub R32(%r11), R32(%rax) 149 neg R32(%rax) 150 ',` 151 add R32(%r11), R32(%rax) 152 movslq R32(%rax), %rax 153 ') 154 pop %r15 155 pop %r14 156 pop %r13 157 pop %r12 158 FUNC_EXIT() 159 ret 160 EPILOGUE()