github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/addmul_2.asm (about) 1 dnl AMD64 mpn_addmul_2 -- Multiply an n-limb vector with a 2-limb vector and 2 dnl add the result to a third limb vector. 3 4 dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C AMD K8,K9 2.375 36 C AMD K10 2.375 37 C Intel P4 15-16 38 C Intel core2 4.45 39 C Intel NHM 4.32 40 C Intel SBR 3.4 41 C Intel atom ? 42 C VIA nano 4.4 43 44 C This code is the result of running a code generation and optimization tool 45 C suite written by David Harvey and Torbjorn Granlund. 46 47 C TODO 48 C * Tune feed-in and wind-down code. 49 50 C INPUT PARAMETERS 51 define(`rp', `%rdi') 52 define(`up', `%rsi') 53 define(`n_param',`%rdx') 54 define(`vp', `%rcx') 55 56 define(`v0', `%r8') 57 define(`v1', `%r9') 58 define(`w0', `%rbx') 59 define(`w1', `%rcx') 60 define(`w2', `%rbp') 61 define(`w3', `%r10') 62 define(`n', `%r11') 63 64 ABI_SUPPORT(DOS64) 65 ABI_SUPPORT(STD64) 66 67 ASM_START() 68 TEXT 69 ALIGN(16) 70 PROLOGUE(mpn_addmul_2) 71 FUNC_ENTRY(4) 72 mov n_param, n 73 push %rbx 74 push %rbp 75 76 mov 0(vp), v0 77 mov 8(vp), v1 78 79 mov R32(n_param), R32(%rbx) 80 mov (up), %rax 81 lea -8(up,n_param,8), up 82 lea -8(rp,n_param,8), rp 83 mul v0 84 neg n 85 and $3, R32(%rbx) 86 jz L(b0) 87 cmp $2, R32(%rbx) 88 jc L(b1) 89 jz L(b2) 90 91 L(b3): mov %rax, w1 92 mov %rdx, w2 93 xor R32(w3), R32(w3) 94 mov 8(up,n,8), %rax 95 dec n 96 jmp L(lo3) 97 98 L(b2): mov %rax, w2 99 mov 8(up,n,8), %rax 100 mov %rdx, w3 101 xor R32(w0), R32(w0) 102 add $-2, n 103 jmp L(lo2) 104 105 L(b1): mov %rax, w3 106 mov 8(up,n,8), %rax 107 mov %rdx, w0 108 xor R32(w1), R32(w1) 109 inc n 110 jmp L(lo1) 111 112 L(b0): mov $0, R32(w3) 113 mov %rax, w0 114 mov 8(up,n,8), %rax 115 mov %rdx, w1 116 xor R32(w2), R32(w2) 117 jmp L(lo0) 118 119 ALIGN(32) 120 L(top): mov $0, R32(w1) 121 mul v0 122 add %rax, w3 123 mov (up,n,8), %rax 124 adc %rdx, w0 125 adc $0, R32(w1) 126 L(lo1): mul v1 127 add w3, (rp,n,8) 128 mov $0, R32(w3) 129 adc %rax, w0 130 mov $0, R32(w2) 131 mov 8(up,n,8), %rax 132 adc %rdx, w1 133 mul v0 134 add %rax, w0 135 mov 8(up,n,8), %rax 136 adc %rdx, w1 137 adc $0, R32(w2) 138 L(lo0): mul v1 139 add w0, 8(rp,n,8) 140 adc %rax, w1 141 adc %rdx, w2 142 mov 16(up,n,8), %rax 143 mul v0 144 add %rax, w1 145 adc %rdx, w2 146 adc $0, R32(w3) 147 mov 16(up,n,8), %rax 148 L(lo3): mul v1 149 add w1, 16(rp,n,8) 150 adc %rax, w2 151 adc %rdx, w3 152 xor R32(w0), R32(w0) 153 mov 24(up,n,8), %rax 154 mul v0 155 add %rax, w2 156 mov 24(up,n,8), %rax 157 adc %rdx, w3 158 adc $0, R32(w0) 159 L(lo2): mul v1 160 add w2, 24(rp,n,8) 161 adc %rax, w3 162 adc %rdx, w0 163 mov 32(up,n,8), %rax 164 add $4, n 165 js L(top) 166 167 L(end): xor R32(w1), R32(w1) 168 mul v0 169 add %rax, w3 170 mov (up), %rax 171 adc %rdx, w0 172 adc R32(w1), R32(w1) 173 mul v1 174 add w3, (rp) 175 adc %rax, w0 176 adc %rdx, w1 177 mov w0, 8(rp) 178 mov w1, %rax 179 180 pop %rbp 181 pop %rbx 182 FUNC_EXIT() 183 ret 184 EPILOGUE()