github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/invert_limb.asm (about) 1 dnl AMD64 mpn_invert_limb -- Invert a normalized limb. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund and Niels Möller. 4 5 dnl Copyright 2004, 2007-2009, 2011, 2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 36 C cycles/limb (approx) div 37 C AMD K8,K9 48 71 38 C AMD K10 48 77 39 C Intel P4 135 161 40 C Intel core2 69 116 41 C Intel corei 55 89 42 C Intel atom 129 191 43 C VIA nano 79 157 44 45 C rax rcx rdx rdi rsi r8 46 47 ABI_SUPPORT(DOS64) 48 ABI_SUPPORT(STD64) 49 50 PROTECT(`mpn_invert_limb_table') 51 52 ASM_START() 53 TEXT 54 ALIGN(16) 55 PROLOGUE(mpn_invert_limb) C Kn C2 Ci 56 FUNC_ENTRY(1) 57 mov %rdi, %rax C 0 0 0 58 shr $55, %rax C 1 1 1 59 ifdef(`PIC',` 60 ifdef(`DARWIN',` 61 mov mpn_invert_limb_table@GOTPCREL(%rip), %r8 62 add $-512, %r8 63 ',` 64 lea -512+mpn_invert_limb_table(%rip), %r8 65 ')',` 66 movabs $-512+mpn_invert_limb_table, %r8 67 ') 68 movzwl (%r8,%rax,2), R32(%rcx) C %rcx = v0 69 70 C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1 71 mov %rdi, %rsi C 0 0 0 72 mov R32(%rcx), R32(%rax) C 4 5 5 73 imul R32(%rcx), R32(%rcx) C 4 5 5 74 shr $24, %rsi C 1 1 1 75 inc %rsi C %rsi = d40 76 imul %rsi, %rcx C 8 10 8 77 shr $40, %rcx C 12 15 11 78 sal $11, R32(%rax) C 5 6 6 79 dec R32(%rax) 80 sub R32(%rcx), R32(%rax) C %rax = v1 81 82 C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47) 83 mov $0x1000000000000000, %rcx 84 imul %rax, %rsi C 14 17 13 85 sub %rsi, %rcx 86 imul %rax, %rcx 87 sal $13, %rax 88 shr $47, %rcx 89 add %rax, %rcx C %rcx = v2 90 91 C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + ((v2 >> 1) & mask)) >> 65 92 mov %rdi, %rsi C 0 0 0 93 shr %rsi C d/2 94 sbb %rax, %rax C -d0 = -(d mod 2) 95 sub %rax, %rsi C d63 = ceil(d/2) 96 imul %rcx, %rsi C v2 * d63 97 and %rcx, %rax C v2 * d0 98 shr %rax C (v2>>1) * d0 99 sub %rsi, %rax C (v2>>1) * d0 - v2 * d63 100 mul %rcx 101 sal $31, %rcx 102 shr %rdx 103 add %rdx, %rcx C %rcx = v3 104 105 mov %rdi, %rax 106 mul %rcx 107 add %rdi, %rax 108 mov %rcx, %rax 109 adc %rdi, %rdx 110 sub %rdx, %rax 111 112 FUNC_EXIT() 113 ret 114 EPILOGUE() 115 ASM_END()