github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_err2_n.asm (about) 1 dnl AMD64 mpn_add_err2_n, mpn_sub_err2_n 2 3 dnl Contributed by David Harvey. 4 5 dnl Copyright 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C AMD K8,K9 4.5 37 C AMD K10 ? 38 C Intel P4 ? 39 C Intel core2 6.9 40 C Intel corei ? 41 C Intel atom ? 42 C VIA nano ? 43 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`up', `%rsi') 48 define(`vp', `%rdx') 49 define(`ep', `%rcx') 50 define(`yp1', `%r8') 51 define(`yp2', `%r9') 52 define(`n_param', `8(%rsp)') 53 define(`cy_param', `16(%rsp)') 54 55 define(`cy1', `%r14') 56 define(`cy2', `%rax') 57 58 define(`n', `%r10') 59 60 define(`w', `%rbx') 61 define(`e1l', `%rbp') 62 define(`e1h', `%r11') 63 define(`e2l', `%r12') 64 define(`e2h', `%r13') 65 66 67 ifdef(`OPERATION_add_err2_n', ` 68 define(ADCSBB, adc) 69 define(func, mpn_add_err2_n)') 70 ifdef(`OPERATION_sub_err2_n', ` 71 define(ADCSBB, sbb) 72 define(func, mpn_sub_err2_n)') 73 74 MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n) 75 76 77 ASM_START() 78 TEXT 79 ALIGN(16) 80 PROLOGUE(func) 81 mov cy_param, cy2 82 mov n_param, n 83 84 push %rbx 85 push %rbp 86 push %r12 87 push %r13 88 push %r14 89 90 xor R32(e1l), R32(e1l) 91 xor R32(e1h), R32(e1h) 92 xor R32(e2l), R32(e2l) 93 xor R32(e2h), R32(e2h) 94 95 sub yp1, yp2 96 97 lea (rp,n,8), rp 98 lea (up,n,8), up 99 lea (vp,n,8), vp 100 101 test $1, n 102 jnz L(odd) 103 104 lea -8(yp1,n,8), yp1 105 neg n 106 jmp L(top) 107 108 ALIGN(16) 109 L(odd): 110 lea -16(yp1,n,8), yp1 111 neg n 112 shr $1, cy2 113 mov (up,n,8), w 114 ADCSBB (vp,n,8), w 115 cmovc 8(yp1), e1l 116 cmovc 8(yp1,yp2), e2l 117 mov w, (rp,n,8) 118 sbb cy2, cy2 119 inc n 120 jz L(end) 121 122 ALIGN(16) 123 L(top): 124 mov (up,n,8), w 125 shr $1, cy2 C restore carry 126 ADCSBB (vp,n,8), w 127 mov w, (rp,n,8) 128 sbb cy1, cy1 C generate mask, preserve CF 129 130 mov 8(up,n,8), w 131 ADCSBB 8(vp,n,8), w 132 mov w, 8(rp,n,8) 133 sbb cy2, cy2 C generate mask, preserve CF 134 135 mov (yp1), w C (e1h:e1l) += cy1 * yp1 limb 136 and cy1, w 137 add w, e1l 138 adc $0, e1h 139 140 and (yp1,yp2), cy1 C (e2h:e2l) += cy1 * yp2 limb 141 add cy1, e2l 142 adc $0, e2h 143 144 mov -8(yp1), w C (e1h:e1l) += cy2 * next yp1 limb 145 and cy2, w 146 add w, e1l 147 adc $0, e1h 148 149 mov -8(yp1,yp2), w C (e2h:e2l) += cy2 * next yp2 limb 150 and cy2, w 151 add w, e2l 152 adc $0, e2h 153 154 add $2, n 155 lea -16(yp1), yp1 156 jnz L(top) 157 L(end): 158 159 mov e1l, (ep) 160 mov e1h, 8(ep) 161 mov e2l, 16(ep) 162 mov e2h, 24(ep) 163 164 and $1, %eax C return carry 165 166 pop %r14 167 pop %r13 168 pop %r12 169 pop %rbp 170 pop %rbx 171 ret 172 EPILOGUE()