github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bobcat/aorsmul_1.asm (about) 1 dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 4.5 35 C AMD K10 4.5 36 C AMD bd1 4.75 37 C AMD bobcat 5 38 C Intel P4 17.7 39 C Intel core2 5.5 40 C Intel NHM 5.43 41 C Intel SBR 3.92 42 C Intel atom 23 43 C VIA nano 5.63 44 45 ABI_SUPPORT(DOS64) 46 ABI_SUPPORT(STD64) 47 48 ifdef(`OPERATION_addmul_1',` 49 define(`ADDSUB', `add') 50 define(`func', `mpn_addmul_1') 51 ') 52 ifdef(`OPERATION_submul_1',` 53 define(`ADDSUB', `sub') 54 define(`func', `mpn_submul_1') 55 ') 56 57 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 58 59 C Standard parameters 60 define(`rp', `%rdi') 61 define(`up', `%rsi') 62 define(`n_param', `%rdx') 63 define(`v0', `%rcx') 64 C Standard allocations 65 define(`n', `%rbx') 66 define(`w0', `%r8') 67 define(`w1', `%r9') 68 define(`w2', `%r10') 69 define(`w3', `%r11') 70 71 C DOS64 parameters 72 IFDOS(` define(`rp', `%rcx') ') dnl 73 IFDOS(` define(`up', `%rsi') ') dnl 74 IFDOS(` define(`n_param', `%r8') ') dnl 75 IFDOS(` define(`v0', `%r9') ') dnl 76 C DOS64 allocations 77 IFDOS(` define(`n', `%rbx') ') dnl 78 IFDOS(` define(`w0', `%r8') ') dnl 79 IFDOS(` define(`w1', `%rdi') ') dnl 80 IFDOS(` define(`w2', `%r10') ') dnl 81 IFDOS(` define(`w3', `%r11') ') dnl 82 83 ASM_START() 84 TEXT 85 ALIGN(16) 86 PROLOGUE(func) 87 IFDOS(` push %rsi ') 88 IFDOS(` push %rdi ') 89 IFDOS(` mov %rdx, %rsi ') 90 91 push %rbx 92 mov (up), %rax 93 94 lea -16(rp,n_param,8), rp 95 lea -16(up,n_param,8), up 96 97 mov n_param, n 98 and $3, R32(n_param) 99 jz L(b0) 100 cmp $2, R32(n_param) 101 ja L(b3) 102 jz L(b2) 103 104 L(b1): mul v0 105 cmp $1, n 106 jz L(n1) 107 mov %rax, w2 108 mov %rdx, w3 109 neg n 110 add $3, n 111 jmp L(L1) 112 L(n1): ADDSUB %rax, 8(rp) 113 adc $0, %rdx 114 mov %rdx, %rax 115 pop %rbx 116 IFDOS(` pop %rdi ') 117 IFDOS(` pop %rsi ') 118 ret 119 120 L(b3): mul v0 121 mov %rax, w2 122 mov %rdx, w3 123 neg n 124 inc n 125 jmp L(L3) 126 127 L(b0): mul v0 128 mov %rax, w0 129 mov %rdx, w1 130 neg n 131 add $2, n 132 jmp L(L0) 133 134 L(b2): mul v0 135 mov %rax, w0 136 mov %rdx, w1 137 neg n 138 jmp L(L2) 139 140 ALIGN(16) 141 L(top): ADDSUB w0, -16(rp,n,8) 142 adc w1, w2 143 adc $0, w3 144 L(L1): mov 0(up,n,8), %rax 145 mul v0 146 mov %rax, w0 147 mov %rdx, w1 148 ADDSUB w2, -8(rp,n,8) 149 adc w3, w0 150 adc $0, w1 151 L(L0): mov 8(up,n,8), %rax 152 mul v0 153 mov %rax, w2 154 mov %rdx, w3 155 ADDSUB w0, 0(rp,n,8) 156 adc w1, w2 157 adc $0, w3 158 L(L3): mov 16(up,n,8), %rax 159 mul v0 160 mov %rax, w0 161 mov %rdx, w1 162 ADDSUB w2, 8(rp,n,8) 163 adc w3, w0 164 adc $0, w1 165 L(L2): mov 24(up,n,8), %rax 166 mul v0 167 mov %rax, w2 168 mov %rdx, w3 169 add $4, n 170 js L(top) 171 172 L(end): ADDSUB w0, (rp) 173 adc w1, w2 174 adc $0, w3 175 ADDSUB w2, 8(rp) 176 adc $0, w3 177 mov w3, %rax 178 179 pop %rbx 180 IFDOS(` pop %rdi ') 181 IFDOS(` pop %rsi ') 182 ret 183 EPILOGUE()