github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/mulx/aorsmul_1.asm (about) 1 dnl AMD64 mpn_addmul_1 and mpn_submul_1 for CPUs with mulx. 2 3 dnl Copyright 2012, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 - 35 C AMD K10 - 36 C AMD bd1 - 37 C AMD bd2 ? 38 C AMD bobcat - 39 C AMD jaguar ? 40 C Intel P4 - 41 C Intel PNR - 42 C Intel NHM - 43 C Intel SBR - 44 C Intel HWL ? 45 C Intel BWL ? 46 C Intel atom - 47 C VIA nano - 48 49 define(`rp', `%rdi') C rcx 50 define(`up', `%rsi') C rdx 51 define(`n_param', `%rdx') C r8 52 define(`v0_param',`%rcx') C r9 53 54 define(`n', `%rcx') 55 define(`v0', `%rdx') 56 57 ifdef(`OPERATION_addmul_1',` 58 define(`ADDSUB', `add') 59 define(`ADCSBB', `adc') 60 define(`func', `mpn_addmul_1') 61 ') 62 ifdef(`OPERATION_submul_1',` 63 define(`ADDSUB', `sub') 64 define(`ADCSBB', `sbb') 65 define(`func', `mpn_submul_1') 66 ') 67 68 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 69 70 IFDOS(` define(`up', ``%rsi'') ') dnl 71 IFDOS(` define(`rp', ``%rcx'') ') dnl 72 IFDOS(` define(`vl', ``%r9'') ') dnl 73 IFDOS(` define(`r9', ``rdi'') ') dnl 74 IFDOS(` define(`n', ``%r8'') ') dnl 75 IFDOS(` define(`r8', ``r11'') ') dnl 76 77 ASM_START() 78 TEXT 79 ALIGN(16) 80 PROLOGUE(func) 81 mov (up), %r8 82 83 push %rbx 84 push %r12 85 push %r13 86 87 lea (up,n_param,8), up 88 lea -32(rp,n_param,8), rp 89 mov R32(n_param), R32(%rax) 90 xchg v0_param, v0 C FIXME: is this insn fast? 91 92 neg n 93 94 and $3, R8(%rax) 95 jz L(b0) 96 cmp $2, R8(%rax) 97 jz L(b2) 98 jg L(b3) 99 100 L(b1): mulx %r8, %rbx, %rax 101 sub $-1, n 102 jz L(wd1) 103 mulx (up,n,8), %r9, %r8 104 mulx 8(up,n,8), %r11, %r10 105 test R32(%rax), R32(%rax) C clear cy 106 jmp L(lo1) 107 108 L(b0): mulx %r8, %r9, %r8 109 mulx 8(up,n,8), %r11, %r10 110 mulx 16(up,n,8), %r13, %r12 111 xor R32(%rax), R32(%rax) 112 jmp L(lo0) 113 114 L(b3): mulx %r8, %r11, %r10 115 mulx 8(up,n,8), %r13, %r12 116 mulx 16(up,n,8), %rbx, %rax 117 add %r10, %r13 118 adc %r12, %rbx 119 adc $0, %rax 120 sub $-3, n 121 jz L(wd3) 122 test R32(%rax), R32(%rax) C clear cy 123 jmp L(lo3) 124 125 L(b2): mulx %r8, %r13, %r12 126 mulx 8(up,n,8), %rbx, %rax 127 add %r12, %rbx 128 adc $0, %rax 129 sub $-2, n 130 jz L(wd2) 131 mulx (up,n,8), %r9, %r8 132 test R32(%rax), R32(%rax) C clear cy 133 jmp L(lo2) 134 135 L(top): ADDSUB %r9, (rp,n,8) 136 L(lo3): mulx (up,n,8), %r9, %r8 137 ADCSBB %r11, 8(rp,n,8) 138 L(lo2): mulx 8(up,n,8), %r11, %r10 139 ADCSBB %r13, 16(rp,n,8) 140 L(lo1): mulx 16(up,n,8), %r13, %r12 141 ADCSBB %rbx, 24(rp,n,8) 142 adc %rax, %r9 143 L(lo0): mulx 24(up,n,8), %rbx, %rax 144 adc %r8, %r11 145 adc %r10, %r13 146 adc %r12, %rbx 147 adc $0, %rax C rax = carry limb 148 add $4, n 149 js L(top) 150 151 L(end): ADDSUB %r9, (rp) 152 L(wd3): ADCSBB %r11, 8(rp) 153 L(wd2): ADCSBB %r13, 16(rp) 154 L(wd1): ADCSBB %rbx, 24(rp) 155 adc n, %rax 156 pop %r13 157 pop %r12 158 pop %rbx 159 ret 160 EPILOGUE() 161 ASM_END()