github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_err1_n.asm (about) 1 dnl AMD64 mpn_add_err1_n, mpn_sub_err1_n 2 3 dnl Contributed by David Harvey. 4 5 dnl Copyright 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C AMD K8,K9 2.75 (degenerates to 3 c/l for some alignments) 37 C AMD K10 ? 38 C Intel P4 ? 39 C Intel core2 ? 40 C Intel corei ? 41 C Intel atom ? 42 C VIA nano ? 43 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`up', `%rsi') 48 define(`vp', `%rdx') 49 define(`ep', `%rcx') 50 define(`yp', `%r8') 51 define(`n', `%r9') 52 define(`cy_param', `8(%rsp)') 53 54 define(`el', `%rbx') 55 define(`eh', `%rbp') 56 define(`t0', `%r10') 57 define(`t1', `%r11') 58 define(`t2', `%r12') 59 define(`t3', `%r13') 60 define(`w0', `%r14') 61 define(`w1', `%r15') 62 63 ifdef(`OPERATION_add_err1_n', ` 64 define(ADCSBB, adc) 65 define(func, mpn_add_err1_n)') 66 ifdef(`OPERATION_sub_err1_n', ` 67 define(ADCSBB, sbb) 68 define(func, mpn_sub_err1_n)') 69 70 MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n) 71 72 73 ASM_START() 74 TEXT 75 ALIGN(16) 76 PROLOGUE(func) 77 mov cy_param, %rax 78 79 push %rbx 80 push %rbp 81 push %r12 82 push %r13 83 push %r14 84 push %r15 85 86 lea (up,n,8), up 87 lea (vp,n,8), vp 88 lea (rp,n,8), rp 89 90 mov R32(n), R32(%r10) 91 and $3, R32(%r10) 92 jz L(0mod4) 93 cmp $2, R32(%r10) 94 jc L(1mod4) 95 jz L(2mod4) 96 L(3mod4): 97 xor R32(el), R32(el) 98 xor R32(eh), R32(eh) 99 xor R32(t0), R32(t0) 100 xor R32(t1), R32(t1) 101 lea -24(yp,n,8), yp 102 neg n 103 104 shr $1, %al C restore carry 105 mov (up,n,8), w0 106 mov 8(up,n,8), w1 107 ADCSBB (vp,n,8), w0 108 mov w0, (rp,n,8) 109 cmovc 16(yp), el 110 ADCSBB 8(vp,n,8), w1 111 mov w1, 8(rp,n,8) 112 cmovc 8(yp), t0 113 mov 16(up,n,8), w0 114 ADCSBB 16(vp,n,8), w0 115 mov w0, 16(rp,n,8) 116 cmovc (yp), t1 117 setc %al C save carry 118 add t0, el 119 adc $0, eh 120 add t1, el 121 adc $0, eh 122 123 add $3, n 124 jnz L(loop) 125 jmp L(end) 126 127 ALIGN(16) 128 L(0mod4): 129 xor R32(el), R32(el) 130 xor R32(eh), R32(eh) 131 lea (yp,n,8), yp 132 neg n 133 jmp L(loop) 134 135 ALIGN(16) 136 L(1mod4): 137 xor R32(el), R32(el) 138 xor R32(eh), R32(eh) 139 lea -8(yp,n,8), yp 140 neg n 141 142 shr $1, %al C restore carry 143 mov (up,n,8), w0 144 ADCSBB (vp,n,8), w0 145 mov w0, (rp,n,8) 146 cmovc (yp), el 147 setc %al C save carry 148 149 add $1, n 150 jnz L(loop) 151 jmp L(end) 152 153 ALIGN(16) 154 L(2mod4): 155 xor R32(el), R32(el) 156 xor R32(eh), R32(eh) 157 xor R32(t0), R32(t0) 158 lea -16(yp,n,8), yp 159 neg n 160 161 shr $1, %al C restore carry 162 mov (up,n,8), w0 163 mov 8(up,n,8), w1 164 ADCSBB (vp,n,8), w0 165 mov w0, (rp,n,8) 166 cmovc 8(yp), el 167 ADCSBB 8(vp,n,8), w1 168 mov w1, 8(rp,n,8) 169 cmovc (yp), t0 170 setc %al C save carry 171 add t0, el 172 adc $0, eh 173 174 add $2, n 175 jnz L(loop) 176 jmp L(end) 177 178 ALIGN(32) 179 L(loop): 180 shr $1, %al C restore carry 181 mov -8(yp), t0 182 mov $0, R32(t3) 183 mov (up,n,8), w0 184 mov 8(up,n,8), w1 185 ADCSBB (vp,n,8), w0 186 cmovnc t3, t0 187 ADCSBB 8(vp,n,8), w1 188 mov -16(yp), t1 189 mov w0, (rp,n,8) 190 mov 16(up,n,8), w0 191 mov w1, 8(rp,n,8) 192 cmovnc t3, t1 193 mov -24(yp), t2 194 ADCSBB 16(vp,n,8), w0 195 cmovnc t3, t2 196 mov 24(up,n,8), w1 197 ADCSBB 24(vp,n,8), w1 198 cmovc -32(yp), t3 199 setc %al C save carry 200 add t0, el 201 adc $0, eh 202 add t1, el 203 adc $0, eh 204 add t2, el 205 adc $0, eh 206 mov w0, 16(rp,n,8) 207 add t3, el 208 lea -32(yp), yp 209 adc $0, eh 210 mov w1, 24(rp,n,8) 211 add $4, n 212 jnz L(loop) 213 214 L(end): 215 mov el, (ep) 216 mov eh, 8(ep) 217 218 pop %r15 219 pop %r14 220 pop %r13 221 pop %r12 222 pop %rbp 223 pop %rbx 224 ret 225 EPILOGUE()