github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/mulx/mul_1.asm (about) 1 dnl AMD64 mpn_mul_1 for CPUs with mulx. 2 3 dnl Copyright 2012, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 - 35 C AMD K10 - 36 C AMD bd1 - 37 C AMD bd2 ? 38 C AMD bobcat - 39 C AMD jaguar ? 40 C Intel P4 - 41 C Intel PNR - 42 C Intel NHM - 43 C Intel SBR - 44 C Intel HWL ? 45 C Intel BWL ? 46 C Intel atom - 47 C VIA nano - 48 49 define(`rp', `%rdi') C rcx 50 define(`up', `%rsi') C rdx 51 define(`n_param', `%rdx') C r8 52 define(`v0_param',`%rcx') C r9 53 54 define(`n', `%rcx') 55 define(`v0', `%rdx') 56 57 IFDOS(` define(`up', ``%rsi'') ') dnl 58 IFDOS(` define(`rp', ``%rcx'') ') dnl 59 IFDOS(` define(`v0', ``%r9'') ') dnl 60 IFDOS(` define(`r9', ``rdi'') ') dnl 61 IFDOS(` define(`n', ``%r8'') ') dnl 62 IFDOS(` define(`r8', ``r11'') ') dnl 63 64 ASM_START() 65 TEXT 66 ALIGN(16) 67 PROLOGUE(mpn_mul_1c) 68 jmp L(ent) 69 EPILOGUE() 70 PROLOGUE(mpn_mul_1) 71 xor R32(%r8), R32(%r8) C carry-in limb 72 L(ent): mov (up), %r9 73 74 push %rbx 75 push %r12 76 push %r13 77 78 lea (up,n_param,8), up 79 lea -32(rp,n_param,8), rp 80 mov R32(n_param), R32(%rax) 81 xchg v0_param, v0 C FIXME: is this insn fast? 82 83 neg n 84 85 and $3, R8(%rax) 86 jz L(b0) 87 cmp $2, R8(%rax) 88 jz L(b2) 89 jg L(b3) 90 91 L(b1): mov %r8, %r12 92 mulx %r9, %rbx, %rax 93 sub $-1, n 94 jz L(wd1) 95 mulx (up,n,8), %r9, %r8 96 mulx 8(up,n,8), %r11, %r10 97 add %r12, %rbx 98 jmp L(lo1) 99 100 L(b3): mulx %r9, %r11, %r10 101 mulx 8(up,n,8), %r13, %r12 102 mulx 16(up,n,8), %rbx, %rax 103 sub $-3, n 104 jz L(wd3) 105 add %r8, %r11 106 jmp L(lo3) 107 108 L(b2): mov %r8, %r10 C carry-in limb 109 mulx %r9, %r13, %r12 110 mulx 8(up,n,8), %rbx, %rax 111 sub $-2, n 112 jz L(wd2) 113 mulx (up,n,8), %r9, %r8 114 add %r10, %r13 115 jmp L(lo2) 116 117 L(b0): mov %r8, %rax C carry-in limb 118 mulx %r9, %r9, %r8 119 mulx 8(up,n,8), %r11, %r10 120 mulx 16(up,n,8), %r13, %r12 121 add %rax, %r9 122 jmp L(lo0) 123 124 L(top): jrcxz L(end) 125 adc %r8, %r11 126 mov %r9, (rp,n,8) 127 L(lo3): mulx (up,n,8), %r9, %r8 128 adc %r10, %r13 129 mov %r11, 8(rp,n,8) 130 L(lo2): mulx 8(up,n,8), %r11, %r10 131 adc %r12, %rbx 132 mov %r13, 16(rp,n,8) 133 L(lo1): mulx 16(up,n,8), %r13, %r12 134 adc %rax, %r9 135 mov %rbx, 24(rp,n,8) 136 L(lo0): mulx 24(up,n,8), %rbx, %rax 137 lea 4(n), n 138 jmp L(top) 139 140 L(end): mov %r9, (rp) 141 L(wd3): adc %r8, %r11 142 mov %r11, 8(rp) 143 L(wd2): adc %r10, %r13 144 mov %r13, 16(rp) 145 L(wd1): adc %r12, %rbx 146 adc n, %rax 147 mov %rbx, 24(rp) 148 149 pop %r13 150 pop %r12 151 pop %rbx 152 ret 153 EPILOGUE() 154 ASM_END()