github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/core2/lshift.asm (about) 1 dnl x86-64 mpn_lshift optimized for "Core 2". 2 3 dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C AMD K8,K9 4.25 36 C AMD K10 4.25 37 C Intel P4 14.7 38 C Intel core2 1.27 39 C Intel NHM 1.375 (up to about n = 260, then 1.5) 40 C Intel SBR 1.87 41 C Intel atom ? 42 C VIA nano ? 43 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`up', `%rsi') 48 define(`n', `%rdx') 49 define(`cnt', `%rcx') 50 51 ABI_SUPPORT(DOS64) 52 ABI_SUPPORT(STD64) 53 54 ASM_START() 55 TEXT 56 ALIGN(16) 57 PROLOGUE(mpn_lshift) 58 FUNC_ENTRY(4) 59 lea -8(rp,n,8), rp 60 lea -8(up,n,8), up 61 62 mov R32(%rdx), R32(%rax) 63 and $3, R32(%rax) 64 jne L(nb00) 65 L(b00): C n = 4, 8, 12, ... 66 mov (up), %r10 67 mov -8(up), %r11 68 xor R32(%rax), R32(%rax) 69 shld R8(cnt), %r10, %rax 70 mov -16(up), %r8 71 lea 24(rp), rp 72 sub $4, n 73 jmp L(00) 74 75 L(nb00):C n = 1, 5, 9, ... 76 cmp $2, R32(%rax) 77 jae L(nb01) 78 L(b01): mov (up), %r9 79 xor R32(%rax), R32(%rax) 80 shld R8(cnt), %r9, %rax 81 sub $2, n 82 jb L(le1) 83 mov -8(up), %r10 84 mov -16(up), %r11 85 lea -8(up), up 86 lea 16(rp), rp 87 jmp L(01) 88 L(le1): shl R8(cnt), %r9 89 mov %r9, (rp) 90 FUNC_EXIT() 91 ret 92 93 L(nb01):C n = 2, 6, 10, ... 94 jne L(b11) 95 L(b10): mov (up), %r8 96 mov -8(up), %r9 97 xor R32(%rax), R32(%rax) 98 shld R8(cnt), %r8, %rax 99 sub $3, n 100 jb L(le2) 101 mov -16(up), %r10 102 lea -16(up), up 103 lea 8(rp), rp 104 jmp L(10) 105 L(le2): shld R8(cnt), %r9, %r8 106 mov %r8, (rp) 107 shl R8(cnt), %r9 108 mov %r9, -8(rp) 109 FUNC_EXIT() 110 ret 111 112 ALIGN(16) C performance critical! 113 L(b11): C n = 3, 7, 11, ... 114 mov (up), %r11 115 mov -8(up), %r8 116 xor R32(%rax), R32(%rax) 117 shld R8(cnt), %r11, %rax 118 mov -16(up), %r9 119 lea -24(up), up 120 sub $4, n 121 jb L(end) 122 123 ALIGN(16) 124 L(top): shld R8(cnt), %r8, %r11 125 mov (up), %r10 126 mov %r11, (rp) 127 L(10): shld R8(cnt), %r9, %r8 128 mov -8(up), %r11 129 mov %r8, -8(rp) 130 L(01): shld R8(cnt), %r10, %r9 131 mov -16(up), %r8 132 mov %r9, -16(rp) 133 L(00): shld R8(cnt), %r11, %r10 134 mov -24(up), %r9 135 mov %r10, -24(rp) 136 add $-32, up 137 lea -32(rp), rp 138 sub $4, n 139 jnc L(top) 140 141 L(end): shld R8(cnt), %r8, %r11 142 mov %r11, (rp) 143 shld R8(cnt), %r9, %r8 144 mov %r8, -8(rp) 145 shl R8(cnt), %r9 146 mov %r9, -16(rp) 147 FUNC_EXIT() 148 ret 149 EPILOGUE()