github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bobcat/mul_1.asm (about) 1 dnl AMD64 mpn_mul_1 optimised for AMD bobcat. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 4.5 35 C AMD K10 4.5 36 C AMD bd1 4.62 37 C AMD bobcat 5 38 C Intel P4 14 39 C Intel core2 4.5 40 C Intel NHM 4.23 41 C Intel SBR 3.0 42 C Intel atom 21 43 C VIA nano 4.94 44 45 C The loop of this code is the result of running a code generation and 46 C optimisation tool suite written by David Harvey and Torbjorn Granlund. 47 48 ABI_SUPPORT(DOS64) 49 ABI_SUPPORT(STD64) 50 51 C Standard parameters 52 define(`rp', `%rdi') 53 define(`up', `%rsi') 54 define(`n_param', `%rdx') 55 define(`v0', `%rcx') 56 define(`cy', `%r8') 57 C Standard allocations 58 define(`n', `%rbx') 59 define(`w0', `%r8') 60 define(`w1', `%r9') 61 define(`w2', `%r10') 62 define(`w3', `%r11') 63 64 C DOS64 parameters 65 IFDOS(` define(`rp', `%rcx') ') dnl 66 IFDOS(` define(`up', `%rsi') ') dnl 67 IFDOS(` define(`n_param', `%r8') ') dnl 68 IFDOS(` define(`v0', `%r9') ') dnl 69 IFDOS(` define(`cy', `56(%rsp)')') dnl 70 C DOS64 allocations 71 IFDOS(` define(`n', `%rbx') ') dnl 72 IFDOS(` define(`w0', `%r8') ') dnl 73 IFDOS(` define(`w1', `%rdi') ') dnl 74 IFDOS(` define(`w2', `%r10') ') dnl 75 IFDOS(` define(`w3', `%r11') ') dnl 76 77 ASM_START() 78 TEXT 79 ALIGN(16) 80 PROLOGUE(mpn_mul_1c) 81 IFDOS(` push %rsi ') 82 IFDOS(` push %rdi ') 83 IFDOS(` mov %rdx, %rsi ') 84 mov cy, w2 85 jmp L(com) 86 EPILOGUE() 87 88 PROLOGUE(mpn_mul_1) 89 IFDOS(` push %rsi ') 90 IFDOS(` push %rdi ') 91 IFDOS(` mov %rdx, %rsi ') 92 xor w2, w2 93 L(com): push %rbx 94 mov (up), %rax 95 96 lea -16(rp,n_param,8), rp 97 lea -16(up,n_param,8), up 98 99 mov n_param, n 100 and $3, R32(n_param) 101 jz L(b0) 102 cmp $2, R32(n_param) 103 ja L(b3) 104 jz L(b2) 105 106 L(b1): mul v0 107 cmp $1, n 108 jz L(n1) 109 neg n 110 add $3, n 111 add %rax, w2 112 mov %rdx, w3 113 jmp L(L1) 114 L(n1): add %rax, w2 115 mov %rdx, %rax 116 mov w2, 8(rp) 117 adc $0, %rax 118 pop %rbx 119 IFDOS(` pop %rdi ') 120 IFDOS(` pop %rsi ') 121 ret 122 123 L(b3): mul v0 124 neg n 125 inc n 126 add %rax, w2 127 mov %rdx, w3 128 jmp L(L3) 129 130 L(b0): mul v0 131 mov %rax, w0 132 mov %rdx, w1 133 neg n 134 add $2, n 135 add w2, w0 136 jmp L(L0) 137 138 L(b2): mul v0 139 mov %rax, w0 140 mov %rdx, w1 141 neg n 142 add w2, w0 143 jmp L(L2) 144 145 ALIGN(16) 146 L(top): mov w0, -16(rp,n,8) 147 add w1, w2 148 L(L1): adc $0, w3 149 mov 0(up,n,8), %rax 150 mul v0 151 mov %rax, w0 152 mov %rdx, w1 153 mov w2, -8(rp,n,8) 154 add w3, w0 155 L(L0): adc $0, w1 156 mov 8(up,n,8), %rax 157 mul v0 158 mov %rax, w2 159 mov %rdx, w3 160 mov w0, 0(rp,n,8) 161 add w1, w2 162 L(L3): adc $0, w3 163 mov 16(up,n,8), %rax 164 mul v0 165 mov %rax, w0 166 mov %rdx, w1 167 mov w2, 8(rp,n,8) 168 add w3, w0 169 L(L2): adc $0, w1 170 mov 24(up,n,8), %rax 171 mul v0 172 mov %rax, w2 173 mov %rdx, w3 174 add $4, n 175 js L(top) 176 177 L(end): mov w0, (rp) 178 add w1, w2 179 adc $0, w3 180 mov w2, 8(rp) 181 mov w3, %rax 182 183 pop %rbx 184 IFDOS(` pop %rdi ') 185 IFDOS(` pop %rsi ') 186 ret 187 EPILOGUE()