github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aorsmul_1.asm (about) 1 dnl AMD64 mpn_addmul_1 and mpn_submul_1. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 2.5 35 C AMD K10 2.5 36 C AMD bd1 5.0 37 C AMD bobcat 6.17 38 C Intel P4 14.9 39 C Intel core2 5.09 40 C Intel NHM 4.9 41 C Intel SBR 4.0 42 C Intel atom 21.3 43 C VIA nano 5.0 44 45 C The loop of this code is the result of running a code generation and 46 C optimization tool suite written by David Harvey and Torbjorn Granlund. 47 48 C TODO 49 C * The loop is great, but the prologue and epilogue code was quickly written. 50 C Tune it! 51 52 define(`rp', `%rdi') C rcx 53 define(`up', `%rsi') C rdx 54 define(`n_param', `%rdx') C r8 55 define(`vl', `%rcx') C r9 56 57 define(`n', `%r11') 58 59 ifdef(`OPERATION_addmul_1',` 60 define(`ADDSUB', `add') 61 define(`func', `mpn_addmul_1') 62 ') 63 ifdef(`OPERATION_submul_1',` 64 define(`ADDSUB', `sub') 65 define(`func', `mpn_submul_1') 66 ') 67 68 ABI_SUPPORT(DOS64) 69 ABI_SUPPORT(STD64) 70 71 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 72 73 IFDOS(` define(`up', ``%rsi'') ') dnl 74 IFDOS(` define(`rp', ``%rcx'') ') dnl 75 IFDOS(` define(`vl', ``%r9'') ') dnl 76 IFDOS(` define(`r9', ``rdi'') ') dnl 77 IFDOS(` define(`n', ``%r8'') ') dnl 78 IFDOS(` define(`r8', ``r11'') ') dnl 79 80 ASM_START() 81 TEXT 82 ALIGN(16) 83 PROLOGUE(func) 84 85 IFDOS(``push %rsi '') 86 IFDOS(``push %rdi '') 87 IFDOS(``mov %rdx, %rsi '') 88 89 mov (up), %rax C read first u limb early 90 push %rbx 91 IFSTD(` mov n_param, %rbx ') C move away n from rdx, mul uses it 92 IFDOS(` mov n, %rbx ') 93 mul vl 94 IFSTD(` mov %rbx, n ') 95 96 and $3, R32(%rbx) 97 jz L(b0) 98 cmp $2, R32(%rbx) 99 jz L(b2) 100 jg L(b3) 101 102 L(b1): dec n 103 jne L(gt1) 104 ADDSUB %rax, (rp) 105 jmp L(ret) 106 L(gt1): lea 8(up,n,8), up 107 lea -8(rp,n,8), rp 108 neg n 109 xor %r10, %r10 110 xor R32(%rbx), R32(%rbx) 111 mov %rax, %r9 112 mov (up,n,8), %rax 113 mov %rdx, %r8 114 jmp L(L1) 115 116 L(b0): lea (up,n,8), up 117 lea -16(rp,n,8), rp 118 neg n 119 xor %r10, %r10 120 mov %rax, %r8 121 mov %rdx, %rbx 122 jmp L(L0) 123 124 L(b3): lea -8(up,n,8), up 125 lea -24(rp,n,8), rp 126 neg n 127 mov %rax, %rbx 128 mov %rdx, %r10 129 jmp L(L3) 130 131 L(b2): lea -16(up,n,8), up 132 lea -32(rp,n,8), rp 133 neg n 134 xor %r8, %r8 135 xor R32(%rbx), R32(%rbx) 136 mov %rax, %r10 137 mov 24(up,n,8), %rax 138 mov %rdx, %r9 139 jmp L(L2) 140 141 ALIGN(16) 142 L(top): ADDSUB %r10, (rp,n,8) 143 adc %rax, %r9 144 mov (up,n,8), %rax 145 adc %rdx, %r8 146 mov $0, R32(%r10) 147 L(L1): mul vl 148 ADDSUB %r9, 8(rp,n,8) 149 adc %rax, %r8 150 adc %rdx, %rbx 151 L(L0): mov 8(up,n,8), %rax 152 mul vl 153 ADDSUB %r8, 16(rp,n,8) 154 adc %rax, %rbx 155 adc %rdx, %r10 156 L(L3): mov 16(up,n,8), %rax 157 mul vl 158 ADDSUB %rbx, 24(rp,n,8) 159 mov $0, R32(%r8) C zero 160 mov %r8, %rbx C zero 161 adc %rax, %r10 162 mov 24(up,n,8), %rax 163 mov %r8, %r9 C zero 164 adc %rdx, %r9 165 L(L2): mul vl 166 add $4, n 167 js L(top) 168 169 ADDSUB %r10, (rp,n,8) 170 adc %rax, %r9 171 adc %r8, %rdx 172 ADDSUB %r9, 8(rp,n,8) 173 L(ret): adc $0, %rdx 174 mov %rdx, %rax 175 176 pop %rbx 177 IFDOS(``pop %rdi '') 178 IFDOS(``pop %rsi '') 179 ret 180 EPILOGUE()